From cd53a49831b180452774bff4e98b66cad5e5dce2 Mon Sep 17 00:00:00 2001
From: lasseedfast <>
Date: Mon, 30 Oct 2023 12:03:30 +0100
Subject: [PATCH] Adding kkama_server.py

---
 .gitignore      |  3 ++-
 llama_server.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletion(-)
 create mode 100644 llama_server.py

diff --git a/.gitignore b/.gitignore
index 790aa09..b539cb7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,5 @@
 !streamlit_app_talking_ep.py
 !.gitignore
 !streamlit_info.py
-!notes.md
\ No newline at end of file
+!notes.md
+!llama_server.py
\ No newline at end of file
diff --git a/llama_server.py b/llama_server.py
new file mode 100644
index 0000000..74bf001
--- /dev/null
+++ b/llama_server.py
@@ -0,0 +1,48 @@
+import requests
+
+class LLM():
+    def __init__(self, system_prompt=None, temperature=0.8, max_new_tokens=1000):
+        """
+        Initializes the LLM class with the given parameters.
+
+        Args:
+            system_prompt (str, optional): The system prompt to use. Defaults to "Be precise and keep to the given information.".
+            temperature (float, optional): The temperature to use for generating new tokens. Defaults to 0.8.
+            max_new_tokens (int, optional): The maximum number of new tokens to generate. Defaults to 1000.
+        """
+        self.temperature=temperature
+        self.max_new_tokens=max_new_tokens
+        if system_prompt is None:
+            self.system_prompt="Be precise and keep to the given information."
+        else:
+            self.system_prompt=system_prompt
+    
+    def generate(self, prompt, repeat_penalty=1.2):
+        """
+        Generates new tokens based on the given prompt.
+
+        Args:
+            prompt (str): The prompt to use for generating new tokens.
+
+        Returns:
+            str: The generated tokens.
+        """
+        # Make a POST request to the API endpoint
+        headers = {"Content-Type": "application/json"}
+        url = "http://localhost:8080/completion"
+        json={
+                "prompt": prompt,
+                #"system_prompt": self.system_prompt, #TODO https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md#change-system-prompt-on-runtime
+                "temperature": self.temperature,
+                "n_predict": self.max_new_tokens,
+                "top_k": 30,
+                "repeat_penalty": repeat_penalty,
+            }
+        
+
+        response = requests.post(url, headers=headers, json=json)
+        if not response.ok:
+            print(response.content)
+        else:
+            return response.json()['content']
+