You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

38 lines
1.3 KiB

import os
import json
from pprint import pprint
from arango_client import arango
folder_path = "/home/lasse/riksdagen/personer"
l = []
for filename in os.listdir(folder_path):
if filename.endswith(".json"):
file_path = os.path.join(folder_path, filename)
with open(file_path, "r", encoding="utf-8") as file:
try:
person_data = json.load(file)["person"]
except Exception as e:
print(f"Error loading {filename}: {e}")
person_data["_key"] = str(person_data["intressent_id"])
person_data["namn"] = (
f"{person_data.get('tilltalsnamn')} {person_data.get('efternamn', '')}".strip()
)
person_data["aktiv"] = (
True
if person_data.get("status", "") == "Tjänstgörande riksdagsledamot"
else False
)
l.append(person_data)
print(f"Loaded {len(l)} people")
# Insert in batches to avoid Payload Too Large error
BATCH_SIZE = 50 # You can adjust this if needed
collection = arango.db.collection("people")
for i in range(0, len(l), BATCH_SIZE):
batch = l[i : i + BATCH_SIZE]
print(f"Inserting batch {i//BATCH_SIZE + 1} ({len(batch)} people)...")
collection.insert_many(batch, silent=True, return_old=False, return_new=False)
print("All people inserted.")