Made working with llama.cpp

2 years ago · 275ae9ee13
parent 686dd2ae8f
commit 275ae9ee13
1 changed files with 61 additions and 19 deletions
--- a/streamlit_app_talking_ep.py
+++ b/streamlit_app_talking_ep.py
@ -8,9 +8,7 @@ import matplotlib.pyplot as plt
 import pandas as pd
 import streamlit as st

-
 from arango_things import arango_db
-from things import normalize_party_names
 from streamlit_info import (
    party_colors,
    explainer,
@ -23,7 +21,7 @@ from streamlit_info import (
 )

 from langchain.schema import Document
-
+from llama_server import LLM

 class Params:
    """Class containing parameters for URL.
@ -171,7 +169,8 @@ def summarize(df_party, user_input):

    {texts}

-    Please make a very short summary of the standpoints of the {party} party in the EU parliament, in relation to the user search. Make note of the speaker and the dates for the speeches.
+    You are a journalist and are going to write a short summary of the standpoints of the {party} party in the EU parliament, in relation to the user search. 
+    Make note of the speaker and the dates for the speeches.
    
    Example 1:
    In 2020, Parliamentarian NN, ({party}) wanted to decrease the budget for the EU parliament. Later, 2022, she wanted to incrase the budget.
@ -185,11 +184,48 @@ def summarize(df_party, user_input):

    # Generate a summary of the party's standpoints.
    # Return the summaries dictionary.
-    system_prompt = "You are a journalist and have been asked to write a short summary of the standpoints of the party in the EU parliament."
+    
+    #TODO Have to understand sysmtem_prompt in llama_server
+    #system_prompt = "You are a journalist and have been asked to write a short summary of the standpoints of the party in the EU parliament." 

-    return ollama(
-        prompt=prompt, system_prompt=system_prompt, temperature=0.5, print_tokens=False
-    )
+    return llama.generate(prompt=prompt)
+
+
+def normalize_party_names(name):
+        """
+        Normalizes party names to the format used in the database.
+
+        Parameters:
+        name (str): The party name to be normalized.
+
+        Returns:
+        str: The normalized party name.
+        """
+
+        parties = {
+            "EPP": "EPP",
+            "PPE": "EPP",
+            "RE": "Renew",
+            "S-D": "S&D",
+            "S&D": "S&D",
+            "ID": "ID",
+            "ECR": "ECR",
+            "GUE/NGL": "GUE/NGL",
+            "The Left": "GUE/NGL",
+            "Greens/EFA": "Greens/EFA",
+            "G/EFA": "Greens/EFA",
+            "Verts/ALE": "Greens/EFA",
+            "NA": "NA",
+            "NULL": "NA",
+            None: "NA",
+            "-": "NA",
+            "Vacant": "NA",
+            "NI": "NA",
+            "Renew": "Renew"
+
+        }
+
+        return parties[name]


 def make_snippet(text, input_tokens, token_text_list, token_input_list):
@ -214,7 +250,7 @@ def make_snippet(text, input_tokens, token_text_list, token_input_list):
        snippet = []
        text_lower = text.lower()
        # Calculate snippet length in words.
-        snippet_length = 40 * int(8 / len(input_tokens))  # * Change to another value?
+        snippet_length = 40 * int(10 / len(input_tokens)+1)  # * Change to another value?

        # Loop through each input token.
        for token in input_tokens:
@ -635,6 +671,9 @@ partycodes = list(party_colors.keys())  # List of partycodes
 # Max hits returned by db.
 return_limit = 10000

+# Initialize LLM model.
+llama = LLM(temperature=0.5)
+
 # Ask for word to search for.
 user_input = st.text_input(
    " ",
@ -644,18 +683,17 @@ user_input = st.text_input(
    help='You can use asterix (*), minus (-), quotationmarks ("") and OR.',
 )

-
 if len(user_input) > 3:
    params.q = user_input

    # print(user_input.upper())
-    # print(ollama(prompt=f'''A user wants to search in a database containing debates in the European Parliament and have made the input below. Take that input and write three questions would generate a good result if used for quering a vector database. Answer with a python style list containing the three questions.
+    # print(llama.generate(prompt=f'''A user wants to search in a database containing debates in the European Parliament and have made the input below. Take that input and write three questions would generate a good result if used for quering a vector database. Answer with a python style list containing the three questions.

    # User input: {user_input}

    # Questions: '''))

-    try:  #! When in procution, uncomment this.
+    try:  #! When in prodution, uncomment this.
        user_input = user_input.replace("'", '"')
        input_tokens = re.findall(r'(?:"[^"]*"|\S)+', user_input)

@ -811,10 +849,15 @@ if len(user_input) > 3:
            st.session_state["hits"] = df.shape[0]
        else:
            if st.session_state["hits"] != df.shape[0]:
-                del st.session_state["df_excerpts"]
-                del st.session_state["excerpt_page"]
-                del st.session_state["text_next_page_button"]
+                if "df_excerpts" in st.session_state:
+                    del st.session_state["df_excerpts"]
+                if "excerpt_page" in st.session_state:
+                    del st.session_state["excerpt_page"]
+                if 'text_next_page_button' in st.session_state:
+                    del st.session_state["text_next_page_button"]
                del st.session_state["disable_next_page_button"]
+                if 'disable_next_page_button' in st.session_state:
+                    del st.session_state["disable_next_page_button"]
                st.session_state["hits"] = df.shape[0]

        ##! Show snippets.
@ -871,9 +914,8 @@ if len(user_input) > 3:
            party_talks = pd.DataFrame(df_["Party"].value_counts())
            party_labels = party_talks.index.to_list()
            fig, ax1 = plt.subplots()
-
-            total = party_talks["Party"].sum()
-            mentions = party_talks["Party"]  #!
+            total = party_talks["count"].sum()
+            mentions = party_talks["count"]  #!
            ax1.pie(
                mentions,
                labels=party_labels,
@ -904,7 +946,7 @@ if len(user_input) > 3:
            .mark_bar()
            .encode(
                x="Year",
-                y="Mentions",
+                y="count",
                color=alt.Color("color", scale=None),
                tooltip=["Party", "Mentions"],
            )