You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
239 lines
9.1 KiB
239 lines
9.1 KiB
from _llm import LLM |
|
from collections import Counter |
|
from dotenv import load_dotenv |
|
from _arango import ArangoDB |
|
from all_arguments import arguments as all_arguments |
|
from colorprinter.print_color import * |
|
import matplotlib.pyplot as plt |
|
from sklearn.cluster import KMeans |
|
from sklearn.manifold import TSNE |
|
import matplotlib.pyplot as plt |
|
import numpy as np |
|
|
|
|
|
|
|
def find_argument(argument): |
|
found_argument = None |
|
for key in all_arguments.keys(): |
|
if argument.lower() in key.lower(): |
|
found_argument = key |
|
break |
|
return found_argument |
|
|
|
|
|
def extract_arguments(speeches): |
|
|
|
for speech in speeches: |
|
llm = LLM(chat=True) |
|
# Get the speech text |
|
text = speech["text"] |
|
# Make a prompt asking for arguments in the text about electric cars |
|
prompt = f'''Below is a speech in the European Union. Please provide arguments in the text about electric cars.\n |
|
"""{text}"""\n |
|
What arguments are there in the text? An argument should be for or against something related to electric cars, or neutral. It should not be a political proposal like "we need electric cars". |
|
Answer ONLY with the arguments, no explanations, greetings or other text. |
|
Make the argument as detailed as possible so it is possible to understand why the argument is for or against electric cars. |
|
If there are no arguments, answer only with "None". |
|
Answer with one argument per line. |
|
''' |
|
# Generate arguments |
|
arguments = llm.generate(prompt) |
|
print(arguments) |
|
all_arguments = [] |
|
general_arguments = [] |
|
for argument in arguments.split("\n"): |
|
if "None" in argument: |
|
continue |
|
prompt = f"""Based on the speech earlier, make this argument more general and less specific, this will make it easier to compare the arguments with other arguments. |
|
"{argument}" |
|
Generalize the argument as much as possible, so it can be compared with other arguments. Answer ONLY with the generalized argument, no explanations, greetings or other text. |
|
""" |
|
all_arguments.append(argument) |
|
general_argument = llm.generate(prompt) |
|
general_arguments.append(general_argument) |
|
print(">", general_argument) |
|
# Update the document with the arguments |
|
speech["arguments"] = all_arguments |
|
speech["general_arguments"] = general_arguments |
|
arango.update_ev_document(speech) |
|
print("---") |
|
|
|
|
|
def categorize_arguments(arguments_string): |
|
|
|
from openai import OpenAI |
|
|
|
client = OpenAI( |
|
# This is the default and can be omitted |
|
api_key="sk-proj-5WJ1DIQfXdAHJQ0izfa1T3BlbkFJuWBpyJWJKal4MIMk3kbZ", |
|
) |
|
"" |
|
prompt = f'''I'm collecting arguments for and against electric cars. Here are the list of arguments I have collected so far: |
|
""" |
|
{arguments_string} |
|
""" |
|
I want to make the list much shorter, combining similar arguments into one argument (and the arguments thereby becoming less specific). |
|
Can you help me make the list shorter and make it into JSON data like the one below? |
|
{{ |
|
"Problematic Resource Extraction": {{"argument": "The extraction and processing of raw materials for batteries (e.g., lithium, cobalt) cause serious environmental damage and involve toxic substances. Additionally, mining often involves significant social issues such as child labor and poor working conditions in developing countries.", "sentiment": "negative"}}, |
|
"argument": "Lack of Affordability and Accessibility": "The shift to electric vehicles (EVs) could make car ownership less affordable for low-income individuals, especially given the current lack of charging infrastructure in rural areas.", "sentiment": "negative"}}, |
|
}} |
|
I want to answer as a clean JSON text string, nothing else (as a will load the JSON data into a Python dictionary later on). |
|
I'm especially interested in arguments around climate, CO2 and environmental impact, and these can be more detailed than other arguments and not as general. |
|
Sometimes there are arguments both for and against electric cars within the same area of discussion (e.g., CO2 emissions). In these cases, formulate one more positive/supporting argument and one more negative/critical argument, like {{'Low CO2 emissions per km': '...', 'High Co2 emissions during production': '...'}} (but formulate them yourself, don't take this example as it is). |
|
''' |
|
chat_completion = client.chat.completions.create( |
|
messages=[ |
|
{ |
|
"role": "user", |
|
"content": prompt, |
|
} |
|
], |
|
model="gpt-4o", |
|
) |
|
|
|
answer = chat_completion.choices[0].message.content |
|
print(answer) |
|
|
|
# Export to JSON file |
|
import json |
|
|
|
data = json.loads(answer) |
|
with open("arguments.json", "w") as f: |
|
json.dump(data, f, indent=4) |
|
|
|
|
|
def normalize_arguments(): |
|
|
|
for speech in speeches: |
|
normalized_arguments = [] |
|
for argument in speech["arguments"]: |
|
|
|
llm = LLM(chat=True) |
|
if any( |
|
[ |
|
"no argument" in argument.lower(), |
|
"no input" in argument.lower(), |
|
"(" in argument, |
|
] |
|
): |
|
continue |
|
prompt = f'''Below is a speech in the European Union:\n |
|
"""{speech["text"]}"""\n |
|
The following argument has been extracted from the speech:\n |
|
{argument}\n |
|
Please categorize the argument by choosing the most suitable category from the list below:\n |
|
{arguments4prompt} |
|
If the argument does not fit any of the categories, please choose "None".\n |
|
Answer ONLY with the category, no explanations, greetings or other text. |
|
''' |
|
n = 0 |
|
argument = None |
|
while True: |
|
n += 1 |
|
answer = llm.generate(prompt) |
|
if "None" in answer: |
|
print("None") |
|
break |
|
|
|
argument = find_argument(answer) |
|
if argument or n > 3: |
|
break |
|
else: |
|
print('Error:', argument) |
|
prompt += "\nPlease choose a category from the provided list, and answer EXACTLY as it is written in the list." |
|
if argument: |
|
print(argument) |
|
normalized_arguments.append(argument) |
|
speech["normalized_arguments"] = normalized_arguments |
|
arango.update_ev_document(speech) |
|
|
|
|
|
|
|
arango = ArangoDB() |
|
speeches = list(arango.all_ev_speeches()) |
|
print('Number of speeches:', len(speeches)) |
|
arguments = [] |
|
for speech in speeches: |
|
for argument in speech["general_arguments"]: |
|
if any( |
|
[ |
|
"no argument" in argument.lower(), |
|
"no input" in argument.lower(), |
|
"(" in argument, |
|
] |
|
): |
|
continue |
|
arguments.append(argument) |
|
|
|
arguments_string = "\n-".join(arguments) |
|
|
|
arguments4prompt = "" |
|
for argument, values in all_arguments.items(): |
|
arguments4prompt += f'- {argument.upper()}: {values["argument"]})\n' |
|
|
|
#normalize_arguments() |
|
|
|
|
|
arguments = {} |
|
speakers = {} |
|
speakers_arguments = {} |
|
for speech in speeches: |
|
if 'normalized_arguments' not in speech: |
|
continue |
|
args = speech["normalized_arguments"] |
|
for arg in args: |
|
argd = all_arguments[arg] |
|
argd['name'] = speech['name'] |
|
if arg not in arguments: |
|
arguments[arg] = [] |
|
arguments[arg].append(argd) |
|
|
|
if speech['name'] not in speakers: |
|
speakers[speech['name']] = 0 |
|
if argd['sentiment'] == 'positive': |
|
speakers[speech['name']] += 1 |
|
else: |
|
speakers[speech['name']] -= 1 |
|
|
|
if speech['name'] not in speakers_arguments: |
|
speakers_arguments[speech['name']] = [] |
|
speakers_arguments[speech['name']].append(arg) |
|
|
|
sorted_arguments = [] |
|
|
|
for argument, usage in arguments.items(): |
|
arg = all_arguments[argument] |
|
sorted_arguments.append((argument, len(usage), arg['sentiment'])) |
|
|
|
# Sort the list by usage count in descending order |
|
sorted_arguments.sort(key=lambda x: x[1], reverse=True) |
|
|
|
print('\n\nArguments:\n') |
|
positive_arguments = [(argument, usage) for argument, usage, sentiment in sorted_arguments if sentiment == 'positive'] |
|
negative_arguments = [(argument, usage) for argument, usage, sentiment in sorted_arguments if sentiment == 'negative'] |
|
|
|
for argument, usage in positive_arguments: |
|
print_green('+ ', argument, usage) |
|
|
|
for argument, usage in negative_arguments: |
|
print_red('- ', argument, usage) |
|
|
|
|
|
sorted_speakers = [] |
|
for speaker, sentiment in speakers.items(): |
|
sorted_speakers.append((speaker, sentiment)) |
|
sorted_speakers.sort(key=lambda x: x[1], reverse=True) |
|
|
|
top_and_bottom_speakers = sorted_speakers[:5] + sorted_speakers[-5:] |
|
print('\n\nSpeakers:\n') |
|
for speaker, sentiment in top_and_bottom_speakers: |
|
if sentiment > 0: |
|
print_green('+ ', speaker, sentiment) |
|
elif sentiment < 0: |
|
print_red('- ', speaker, sentiment) |
|
else: |
|
print_yellow('0 ', speaker, sentiment) |
|
|
|
print() |
|
|
|
|