You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

31 lines
1.3 KiB

from _arango import ArangoDB
arango = ArangoDB()
speeches = list(arango.all_ev_speeches())
normalized_arguments = []
for speech in speeches:
for argument in speech['normalized_arguments']:
normalized_arguments.append(argument)
normalized_arguments = list(set([f'"{arg}"' for arg in normalized_arguments]))
for argument in normalized_arguments:
print(argument.replace('"', ''))
with open('speeches.csv', 'a+') as f:
f.truncate(0)
# Header row
f.write(f'"_key";"name";"party";"text";"llm summary";{";".join(normalized_arguments)}\n')
for speech in speeches:
# Sanitize text by replacing double quotes with two double quotes and wrapping in double quotes
sanitized_text = f'''"{speech["text"].replace('"', '""').replace(";", ",")}"}}'''
sanitized_summary = f'''"{speech["llm_summary"].replace('"', '""').replace(";", ",")}"}}'''
# Write the speech data, ensuring text fields are enclosed in double quotes
f.write(f'"{speech["_key"]}";"{speech["name"]}";"{speech["party"]}";{sanitized_text};{sanitized_summary};')
for argument in normalized_arguments:
if argument.replace('"', '') in speech['normalized_arguments']:
f.write('1;')
else:
f.write('0;')
f.write('\n')