You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
38 lines
1.3 KiB
38 lines
1.3 KiB
import os |
|
import json |
|
from pprint import pprint |
|
from arango_client import arango |
|
|
|
folder_path = "/home/lasse/riksdagen/personer" |
|
|
|
l = [] |
|
for filename in os.listdir(folder_path): |
|
if filename.endswith(".json"): |
|
file_path = os.path.join(folder_path, filename) |
|
with open(file_path, "r", encoding="utf-8") as file: |
|
try: |
|
person_data = json.load(file)["person"] |
|
except Exception as e: |
|
print(f"Error loading {filename}: {e}") |
|
person_data["_key"] = str(person_data["intressent_id"]) |
|
person_data["namn"] = ( |
|
f"{person_data.get('tilltalsnamn')} {person_data.get('efternamn', '')}".strip() |
|
) |
|
person_data["aktiv"] = ( |
|
True |
|
if person_data.get("status", "") == "Tjänstgörande riksdagsledamot" |
|
else False |
|
) |
|
l.append(person_data) |
|
|
|
print(f"Loaded {len(l)} people") |
|
|
|
# Insert in batches to avoid Payload Too Large error |
|
BATCH_SIZE = 50 # You can adjust this if needed |
|
|
|
collection = arango.db.collection("people") |
|
for i in range(0, len(l), BATCH_SIZE): |
|
batch = l[i : i + BATCH_SIZE] |
|
print(f"Inserting batch {i//BATCH_SIZE + 1} ({len(batch)} people)...") |
|
collection.insert_many(batch, silent=True, return_old=False, return_new=False) |
|
print("All people inserted.")
|
|
|