You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
31 lines
1.3 KiB
31 lines
1.3 KiB
from _arango import ArangoDB |
|
|
|
arango = ArangoDB() |
|
speeches = list(arango.all_ev_speeches()) |
|
|
|
normalized_arguments = [] |
|
for speech in speeches: |
|
for argument in speech['normalized_arguments']: |
|
normalized_arguments.append(argument) |
|
|
|
normalized_arguments = list(set([f'"{arg}"' for arg in normalized_arguments])) |
|
|
|
for argument in normalized_arguments: |
|
print(argument.replace('"', '')) |
|
with open('speeches.csv', 'a+') as f: |
|
f.truncate(0) |
|
# Header row |
|
f.write(f'"_key";"name";"party";"text";"llm summary";{";".join(normalized_arguments)}\n') |
|
for speech in speeches: |
|
# Sanitize text by replacing double quotes with two double quotes and wrapping in double quotes |
|
sanitized_text = f'''"{speech["text"].replace('"', '""').replace(";", ",")}"}}''' |
|
sanitized_summary = f'''"{speech["llm_summary"].replace('"', '""').replace(";", ",")}"}}''' |
|
# Write the speech data, ensuring text fields are enclosed in double quotes |
|
f.write(f'"{speech["_key"]}";"{speech["name"]}";"{speech["party"]}";{sanitized_text};{sanitized_summary};') |
|
for argument in normalized_arguments: |
|
if argument.replace('"', '') in speech['normalized_arguments']: |
|
f.write('1;') |
|
else: |
|
f.write('0;') |
|
f.write('\n') |
|
|
|
|