From cd53a49831b180452774bff4e98b66cad5e5dce2 Mon Sep 17 00:00:00 2001 From: lasseedfast <> Date: Mon, 30 Oct 2023 12:03:30 +0100 Subject: [PATCH] Adding kkama_server.py --- .gitignore | 3 ++- llama_server.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 llama_server.py diff --git a/.gitignore b/.gitignore index 790aa09..b539cb7 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ !streamlit_app_talking_ep.py !.gitignore !streamlit_info.py -!notes.md \ No newline at end of file +!notes.md +!llama_server.py \ No newline at end of file diff --git a/llama_server.py b/llama_server.py new file mode 100644 index 0000000..74bf001 --- /dev/null +++ b/llama_server.py @@ -0,0 +1,48 @@ +import requests + +class LLM(): + def __init__(self, system_prompt=None, temperature=0.8, max_new_tokens=1000): + """ + Initializes the LLM class with the given parameters. + + Args: + system_prompt (str, optional): The system prompt to use. Defaults to "Be precise and keep to the given information.". + temperature (float, optional): The temperature to use for generating new tokens. Defaults to 0.8. + max_new_tokens (int, optional): The maximum number of new tokens to generate. Defaults to 1000. + """ + self.temperature=temperature + self.max_new_tokens=max_new_tokens + if system_prompt is None: + self.system_prompt="Be precise and keep to the given information." + else: + self.system_prompt=system_prompt + + def generate(self, prompt, repeat_penalty=1.2): + """ + Generates new tokens based on the given prompt. + + Args: + prompt (str): The prompt to use for generating new tokens. + + Returns: + str: The generated tokens. + """ + # Make a POST request to the API endpoint + headers = {"Content-Type": "application/json"} + url = "http://localhost:8080/completion" + json={ + "prompt": prompt, + #"system_prompt": self.system_prompt, #TODO https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md#change-system-prompt-on-runtime + "temperature": self.temperature, + "n_predict": self.max_new_tokens, + "top_k": 30, + "repeat_penalty": repeat_penalty, + } + + + response = requests.post(url, headers=headers, json=json) + if not response.ok: + print(response.content) + else: + return response.json()['content'] +