from _arango import ArangoDB arango = ArangoDB() speeches = list(arango.all_ev_speeches()) normalized_arguments = [] for speech in speeches: for argument in speech['normalized_arguments']: normalized_arguments.append(argument) normalized_arguments = list(set([f'"{arg}"' for arg in normalized_arguments])) for argument in normalized_arguments: print(argument.replace('"', '')) with open('speeches.csv', 'a+') as f: f.truncate(0) # Header row f.write(f'"_key";"name";"party";"text";"llm summary";{";".join(normalized_arguments)}\n') for speech in speeches: # Sanitize text by replacing double quotes with two double quotes and wrapping in double quotes sanitized_text = f'''"{speech["text"].replace('"', '""').replace(";", ",")}"}}''' sanitized_summary = f'''"{speech["llm_summary"].replace('"', '""').replace(";", ",")}"}}''' # Write the speech data, ensuring text fields are enclosed in double quotes f.write(f'"{speech["_key"]}";"{speech["name"]}";"{speech["party"]}";{sanitized_text};{sanitized_summary};') for argument in normalized_arguments: if argument.replace('"', '') in speech['normalized_arguments']: f.write('1;') else: f.write('0;') f.write('\n')