electric_cars_project/explore_speakers.py

import pandas as pd
from all_arguments import arguments as arguments_dict  # Arguments dictionary with sentiment information

# Step 1: Read the CSV file
df = pd.read_csv('Blad 1-speeches_sep.csv', delimiter=';')

print(df.head())

# Step 2: Extract relevant columns
# Assuming the arguments start from the 5th column onwards
arguments = df.columns[5:]
df_arguments = df.loc[:, ['_key', 'name'] + list(arguments)]

# Step 3: Create a binary matrix for arguments
# Convert the argument columns to integers
df_arguments.loc[:, arguments] = df_arguments.loc[:, arguments].apply(pd.to_numeric, errors='coerce').fillna(0).astype(int)

# Step 4: Calculate sentiment scores for each politician
def calculate_sentiment_score(row):
    score = 0
    for arg in arguments:
        if row[arg] > 0:
            sentiment = arguments_dict.get(arg, {}).get('sentiment', 'neutral')
            if sentiment == 'positive':
                score += 1
            elif sentiment == 'negative':
                score -= 1
    return score

df_arguments['sentiment_score'] = df_arguments.apply(calculate_sentiment_score, axis=1)

# Step 5: Identify the top 3 most positive and negative politicians
top_3_positive = df_arguments.nlargest(3, 'sentiment_score')
top_3_negative = df_arguments.nsmallest(3, 'sentiment_score')

# Step 6: Extract arguments used by these politicians
def extract_arguments(df):
    result = {}
    for _, row in df.iterrows():
        name = row['name']
        used_arguments = [arg for arg in arguments if row[arg] > 0]
        result[name] = used_arguments
    return result

positive_arguments = extract_arguments(top_3_positive)
negative_arguments = extract_arguments(top_3_negative)

# Print the results
print("Top 3 Positive Politicians and their Arguments:")
for name, args in positive_arguments.items():
    print(f"{name}: {args}")

print("\nTop 3 Negative Politicians and their Arguments:")
for name, args in negative_arguments.items():
    print(f"{name}: {args}")