@ -8,9 +8,7 @@ import matplotlib.pyplot as plt
import pandas as pd
import streamlit as st
from arango_things import arango_db
from things import normalize_party_names
from streamlit_info import (
party_colors ,
explainer ,
@ -23,7 +21,7 @@ from streamlit_info import (
)
from langchain . schema import Document
from llama_server import LLM
class Params :
""" Class containing parameters for URL.
@ -171,7 +169,8 @@ def summarize(df_party, user_input):
{ texts }
Please make a very short summary of the standpoints of the { party } party in the EU parliament , in relation to the user search . Make note of the speaker and the dates for the speeches .
You are a journalist and are going to write a short summary of the standpoints of the { party } party in the EU parliament , in relation to the user search .
Make note of the speaker and the dates for the speeches .
Example 1 :
In 2020 , Parliamentarian NN , ( { party } ) wanted to decrease the budget for the EU parliament . Later , 2022 , she wanted to incrase the budget .
@ -185,11 +184,48 @@ def summarize(df_party, user_input):
# Generate a summary of the party's standpoints.
# Return the summaries dictionary.
system_prompt = " You are a journalist and have been asked to write a short summary of the standpoints of the party in the EU parliament. "
#TODO Have to understand sysmtem_prompt in llama_server
#system_prompt = "You are a journalist and have been asked to write a short summary of the standpoints of the party in the EU parliament."
return ollama (
prompt = prompt , system_prompt = system_prompt , temperature = 0.5 , print_tokens = False
)
return llama . generate ( prompt = prompt )
def normalize_party_names ( name ) :
"""
Normalizes party names to the format used in the database .
Parameters :
name ( str ) : The party name to be normalized .
Returns :
str : The normalized party name .
"""
parties = {
" EPP " : " EPP " ,
" PPE " : " EPP " ,
" RE " : " Renew " ,
" S-D " : " S&D " ,
" S&D " : " S&D " ,
" ID " : " ID " ,
" ECR " : " ECR " ,
" GUE/NGL " : " GUE/NGL " ,
" The Left " : " GUE/NGL " ,
" Greens/EFA " : " Greens/EFA " ,
" G/EFA " : " Greens/EFA " ,
" Verts/ALE " : " Greens/EFA " ,
" NA " : " NA " ,
" NULL " : " NA " ,
None : " NA " ,
" - " : " NA " ,
" Vacant " : " NA " ,
" NI " : " NA " ,
" Renew " : " Renew "
}
return parties [ name ]
def make_snippet ( text , input_tokens , token_text_list , token_input_list ) :
@ -214,7 +250,7 @@ def make_snippet(text, input_tokens, token_text_list, token_input_list):
snippet = [ ]
text_lower = text . lower ( )
# Calculate snippet length in words.
snippet_length = 40 * int ( 8 / len ( input_tokens ) ) # * Change to another value?
snippet_length = 40 * int ( 10 / len ( input_tokens ) + 1 ) # * Change to another value?
# Loop through each input token.
for token in input_tokens :
@ -635,6 +671,9 @@ partycodes = list(party_colors.keys()) # List of partycodes
# Max hits returned by db.
return_limit = 10000
# Initialize LLM model.
llama = LLM ( temperature = 0.5 )
# Ask for word to search for.
user_input = st . text_input (
" " ,
@ -644,18 +683,17 @@ user_input = st.text_input(
help = ' You can use asterix (*), minus (-), quotationmarks ( " " ) and OR. ' ,
)
if len ( user_input ) > 3 :
params . q = user_input
# print(user_input.upper())
# print(o llama(prompt=f'''A user wants to search in a database containing debates in the European Parliament and have made the input below. Take that input and write three questions would generate a good result if used for quering a vector database. Answer with a python style list containing the three questions.
# print(llama.generate (prompt=f'''A user wants to search in a database containing debates in the European Parliament and have made the input below. Take that input and write three questions would generate a good result if used for quering a vector database. Answer with a python style list containing the three questions.
# User input: {user_input}
# Questions: '''))
try : #! When in proc ution, uncomment this.
try : #! When in prod ution, uncomment this.
user_input = user_input . replace ( " ' " , ' " ' )
input_tokens = re . findall ( r ' (?: " [^ " ]* " | \ S)+ ' , user_input )
@ -811,10 +849,15 @@ if len(user_input) > 3:
st . session_state [ " hits " ] = df . shape [ 0 ]
else :
if st . session_state [ " hits " ] != df . shape [ 0 ] :
del st . session_state [ " df_excerpts " ]
del st . session_state [ " excerpt_page " ]
del st . session_state [ " text_next_page_button " ]
if " df_excerpts " in st . session_state :
del st . session_state [ " df_excerpts " ]
if " excerpt_page " in st . session_state :
del st . session_state [ " excerpt_page " ]
if ' text_next_page_button ' in st . session_state :
del st . session_state [ " text_next_page_button " ]
del st . session_state [ " disable_next_page_button " ]
if ' disable_next_page_button ' in st . session_state :
del st . session_state [ " disable_next_page_button " ]
st . session_state [ " hits " ] = df . shape [ 0 ]
##! Show snippets.
@ -871,9 +914,8 @@ if len(user_input) > 3:
party_talks = pd . DataFrame ( df_ [ " Party " ] . value_counts ( ) )
party_labels = party_talks . index . to_list ( )
fig , ax1 = plt . subplots ( )
total = party_talks [ " Party " ] . sum ( )
mentions = party_talks [ " Party " ] #!
total = party_talks [ " count " ] . sum ( )
mentions = party_talks [ " count " ] #!
ax1 . pie (
mentions ,
labels = party_labels ,
@ -904,7 +946,7 @@ if len(user_input) > 3:
. mark_bar ( )
. encode (
x = " Year " ,
y = " Mentions " ,
y = " count " ,
color = alt . Color ( " color " , scale = None ) ,
tooltip = [ " Party " , " Mentions " ] ,
)