import pandas as pd from all_arguments import arguments as arguments_dict # Arguments dictionary with sentiment information # Step 1: Read the CSV file df = pd.read_csv('Blad 1-speeches_sep.csv', delimiter=';') print(df.head()) # Step 2: Extract relevant columns # Assuming the arguments start from the 5th column onwards arguments = df.columns[5:] df_arguments = df.loc[:, ['_key', 'name'] + list(arguments)] # Step 3: Create a binary matrix for arguments # Convert the argument columns to integers df_arguments.loc[:, arguments] = df_arguments.loc[:, arguments].apply(pd.to_numeric, errors='coerce').fillna(0).astype(int) # Step 4: Calculate sentiment scores for each politician def calculate_sentiment_score(row): score = 0 for arg in arguments: if row[arg] > 0: sentiment = arguments_dict.get(arg, {}).get('sentiment', 'neutral') if sentiment == 'positive': score += 1 elif sentiment == 'negative': score -= 1 return score df_arguments['sentiment_score'] = df_arguments.apply(calculate_sentiment_score, axis=1) # Step 5: Identify the top 3 most positive and negative politicians top_3_positive = df_arguments.nlargest(3, 'sentiment_score') top_3_negative = df_arguments.nsmallest(3, 'sentiment_score') # Step 6: Extract arguments used by these politicians def extract_arguments(df): result = {} for _, row in df.iterrows(): name = row['name'] used_arguments = [arg for arg in arguments if row[arg] > 0] result[name] = used_arguments return result positive_arguments = extract_arguments(top_3_positive) negative_arguments = extract_arguments(top_3_negative) # Print the results print("Top 3 Positive Politicians and their Arguments:") for name, args in positive_arguments.items(): print(f"{name}: {args}") print("\nTop 3 Negative Politicians and their Arguments:") for name, args in negative_arguments.items(): print(f"{name}: {args}")