Made working with llama.cpp

local-streamlit
lasseedfast 2 years ago
parent 686dd2ae8f
commit 275ae9ee13
  1. 80
      streamlit_app_talking_ep.py

@ -8,9 +8,7 @@ import matplotlib.pyplot as plt
import pandas as pd
import streamlit as st
from arango_things import arango_db
from things import normalize_party_names
from streamlit_info import (
party_colors,
explainer,
@ -23,7 +21,7 @@ from streamlit_info import (
)
from langchain.schema import Document
from llama_server import LLM
class Params:
"""Class containing parameters for URL.
@ -171,7 +169,8 @@ def summarize(df_party, user_input):
{texts}
Please make a very short summary of the standpoints of the {party} party in the EU parliament, in relation to the user search. Make note of the speaker and the dates for the speeches.
You are a journalist and are going to write a short summary of the standpoints of the {party} party in the EU parliament, in relation to the user search.
Make note of the speaker and the dates for the speeches.
Example 1:
In 2020, Parliamentarian NN, ({party}) wanted to decrease the budget for the EU parliament. Later, 2022, she wanted to incrase the budget.
@ -185,11 +184,48 @@ def summarize(df_party, user_input):
# Generate a summary of the party's standpoints.
# Return the summaries dictionary.
system_prompt = "You are a journalist and have been asked to write a short summary of the standpoints of the party in the EU parliament."
return ollama(
prompt=prompt, system_prompt=system_prompt, temperature=0.5, print_tokens=False
)
#TODO Have to understand sysmtem_prompt in llama_server
#system_prompt = "You are a journalist and have been asked to write a short summary of the standpoints of the party in the EU parliament."
return llama.generate(prompt=prompt)
def normalize_party_names(name):
"""
Normalizes party names to the format used in the database.
Parameters:
name (str): The party name to be normalized.
Returns:
str: The normalized party name.
"""
parties = {
"EPP": "EPP",
"PPE": "EPP",
"RE": "Renew",
"S-D": "S&D",
"S&D": "S&D",
"ID": "ID",
"ECR": "ECR",
"GUE/NGL": "GUE/NGL",
"The Left": "GUE/NGL",
"Greens/EFA": "Greens/EFA",
"G/EFA": "Greens/EFA",
"Verts/ALE": "Greens/EFA",
"NA": "NA",
"NULL": "NA",
None: "NA",
"-": "NA",
"Vacant": "NA",
"NI": "NA",
"Renew": "Renew"
}
return parties[name]
def make_snippet(text, input_tokens, token_text_list, token_input_list):
@ -214,7 +250,7 @@ def make_snippet(text, input_tokens, token_text_list, token_input_list):
snippet = []
text_lower = text.lower()
# Calculate snippet length in words.
snippet_length = 40 * int(8 / len(input_tokens)) # * Change to another value?
snippet_length = 40 * int(10 / len(input_tokens)+1) # * Change to another value?
# Loop through each input token.
for token in input_tokens:
@ -635,6 +671,9 @@ partycodes = list(party_colors.keys()) # List of partycodes
# Max hits returned by db.
return_limit = 10000
# Initialize LLM model.
llama = LLM(temperature=0.5)
# Ask for word to search for.
user_input = st.text_input(
" ",
@ -644,18 +683,17 @@ user_input = st.text_input(
help='You can use asterix (*), minus (-), quotationmarks ("") and OR.',
)
if len(user_input) > 3:
params.q = user_input
# print(user_input.upper())
# print(ollama(prompt=f'''A user wants to search in a database containing debates in the European Parliament and have made the input below. Take that input and write three questions would generate a good result if used for quering a vector database. Answer with a python style list containing the three questions.
# print(llama.generate(prompt=f'''A user wants to search in a database containing debates in the European Parliament and have made the input below. Take that input and write three questions would generate a good result if used for quering a vector database. Answer with a python style list containing the three questions.
# User input: {user_input}
# Questions: '''))
try: #! When in procution, uncomment this.
try: #! When in prodution, uncomment this.
user_input = user_input.replace("'", '"')
input_tokens = re.findall(r'(?:"[^"]*"|\S)+', user_input)
@ -811,10 +849,15 @@ if len(user_input) > 3:
st.session_state["hits"] = df.shape[0]
else:
if st.session_state["hits"] != df.shape[0]:
del st.session_state["df_excerpts"]
del st.session_state["excerpt_page"]
del st.session_state["text_next_page_button"]
if "df_excerpts" in st.session_state:
del st.session_state["df_excerpts"]
if "excerpt_page" in st.session_state:
del st.session_state["excerpt_page"]
if 'text_next_page_button' in st.session_state:
del st.session_state["text_next_page_button"]
del st.session_state["disable_next_page_button"]
if 'disable_next_page_button' in st.session_state:
del st.session_state["disable_next_page_button"]
st.session_state["hits"] = df.shape[0]
##! Show snippets.
@ -871,9 +914,8 @@ if len(user_input) > 3:
party_talks = pd.DataFrame(df_["Party"].value_counts())
party_labels = party_talks.index.to_list()
fig, ax1 = plt.subplots()
total = party_talks["Party"].sum()
mentions = party_talks["Party"] #!
total = party_talks["count"].sum()
mentions = party_talks["count"] #!
ax1.pie(
mentions,
labels=party_labels,
@ -904,7 +946,7 @@ if len(user_input) > 3:
.mark_bar()
.encode(
x="Year",
y="Mentions",
y="count",
color=alt.Color("color", scale=None),
tooltip=["Party", "Mentions"],
)

Loading…
Cancel
Save