- Implemented Pydantic models for article processing and summarization. - Created `test_and_view.py` for testing LLM server document summarization. - Developed `test_llm_server.py` for unit testing summarization functionality. - Added `test_server.py` for additional testing of document and chunk summarization. - Introduced `view_latest_results.py` to display the latest summaries from the LLM server. - Established a structured plan for handling document chunks and their metadata. - Enhanced error handling and user feedback in testing scripts.main
parent
5ee1a062f1
commit
62b68c3717
35 changed files with 6481 additions and 2567 deletions
@ -1,800 +0,0 @@ |
||||
from datetime import datetime |
||||
import streamlit as st |
||||
from _base_class import StreamlitBaseClass, BaseClass |
||||
from _llm import LLM |
||||
from prompts import * |
||||
from colorprinter.print_color import * |
||||
from llm_tools import ToolRegistry |
||||
|
||||
class Chat(StreamlitBaseClass): |
||||
def __init__(self, username=None, **kwargs): |
||||
super().__init__(username=username, **kwargs) |
||||
self.name = kwargs.get("name", None) |
||||
self.chat_history = kwargs.get("chat_history", []) |
||||
|
||||
|
||||
def add_message(self, role, content): |
||||
self.chat_history.append( |
||||
{ |
||||
"role": role, |
||||
"content": content.strip().strip('"'), |
||||
"role_type": self.role, |
||||
} |
||||
) |
||||
|
||||
def to_dict(self): |
||||
return { |
||||
"_key": self._key, |
||||
"name": self.name, |
||||
"chat_history": self.chat_history, |
||||
"role": self.role, |
||||
"username": self.username, |
||||
} |
||||
|
||||
def update_in_arango(self): |
||||
self.last_updated = datetime.now().isoformat() |
||||
self.user_arango.db.collection("chats").insert( |
||||
self.to_dict(), overwrite=True, overwrite_mode="update" |
||||
) |
||||
|
||||
def set_name(self, user_input): |
||||
llm = LLM( |
||||
model="small", |
||||
max_length_answer=50, |
||||
temperature=0.4, |
||||
system_message="You are a chatbot who will be chatting with a user", |
||||
) |
||||
prompt = ( |
||||
f'Give a short name to the chat based on this user input: "{user_input}" ' |
||||
"No more than 30 characters. Answer ONLY with the name of the chat." |
||||
) |
||||
name = llm.generate(prompt).content.strip('"') |
||||
name = f'{name} - {datetime.now().strftime("%B %d")}' |
||||
existing_chat = self.user_arango.db.aql.execute( |
||||
f'FOR doc IN chats FILTER doc.name == "{name}" RETURN doc', count=True |
||||
) |
||||
if existing_chat.count() > 0: |
||||
name = f'{name} ({datetime.now().strftime("%H:%M")})' |
||||
name += f" - [{self.role}]" |
||||
self.name = name |
||||
return name |
||||
|
||||
@classmethod |
||||
def from_dict(cls, data): |
||||
return cls( |
||||
username=data.get("username"), |
||||
name=data.get("name"), |
||||
chat_history=data.get("chat_history", []), |
||||
role=data.get("role", "Research Assistant"), |
||||
_key=data.get("_key"), |
||||
) |
||||
|
||||
def chat_history2bot(self, n_messages: int = None, remove_system: bool = False): |
||||
history = [ |
||||
{"role": m["role"], "content": m["content"]} for m in self.chat_history |
||||
] |
||||
if n_messages and len(history) > n_messages: |
||||
history = history[-n_messages:] |
||||
if ( |
||||
all([history[0]["role"] == "system", remove_system]) |
||||
or history[0]["role"] == "assistant" |
||||
): |
||||
history = history[1:] |
||||
return history |
||||
|
||||
|
||||
class Bot(BaseClass): |
||||
def __init__(self, username: str, chat: Chat = None, tools: list = None, **kwargs): |
||||
super().__init__(username=username, **kwargs) |
||||
|
||||
# Use the passed in chat or create a new Chat |
||||
self.chat = chat if chat else Chat(username=username, role="Research Assistant") |
||||
print_yellow(f"Chat:", chat, type(chat)) |
||||
# Store or set up project/collection if available |
||||
self.project = kwargs.get("project", None) |
||||
self.collection = kwargs.get("collection", None) |
||||
if self.collection and not isinstance(self.collection, list): |
||||
self.collection = [self.collection] |
||||
|
||||
# Load articles in the collections |
||||
self.arango_ids = [] |
||||
if self.collection: |
||||
for c in self.collection: |
||||
for _id in self.user_arango.db.aql.execute( |
||||
""" |
||||
FOR doc IN article_collections |
||||
FILTER doc.name == @collection |
||||
FOR article IN doc.articles |
||||
RETURN article._id |
||||
""", |
||||
bind_vars={"collection": c}, |
||||
): |
||||
self.arango_ids.append(_id) |
||||
|
||||
# A standard LLM for normal chat |
||||
self.chatbot = LLM(messages=self.chat.chat_history2bot()) |
||||
# A helper bot for generating queries or short prompts |
||||
self.helperbot = LLM( |
||||
temperature=0, |
||||
model="small", |
||||
max_length_answer=500, |
||||
system_message=get_query_builder_system_message(), |
||||
messages=self.chat.chat_history2bot(n_messages=4, remove_system=True), |
||||
) |
||||
# A specialized LLM picking which tool to use |
||||
self.toolbot = LLM( |
||||
temperature=0, |
||||
system_message=""" |
||||
You are an assistant bot helping an answering bot to answer a user's messages. |
||||
Your task is to choose one or multiple tools that will help the answering bot to provide the user with the best possible answer. |
||||
You should NEVER directly answer the user. You MUST choose a tool. |
||||
""", |
||||
chat=False, |
||||
model="small", |
||||
) |
||||
|
||||
# Load or register the passed-in tools |
||||
if tools: |
||||
self.tools = ToolRegistry.get_tools(tools=tools) |
||||
else: |
||||
self.tools = ToolRegistry.get_tools() |
||||
|
||||
# Store other kwargs |
||||
for arg in kwargs: |
||||
setattr(self, arg, kwargs[arg]) |
||||
|
||||
|
||||
|
||||
|
||||
def get_chunks( |
||||
self, |
||||
user_input, |
||||
collections=["sci_articles", "other_documents"], |
||||
n_results=7, |
||||
n_sources=4, |
||||
filter=True, |
||||
): |
||||
# Basic version without Streamlit calls |
||||
query = self.helperbot.generate( |
||||
get_generate_vector_query_prompt(user_input, self.chat.role) |
||||
).content.strip('"') |
||||
|
||||
combined_chunks = [] |
||||
if collections: |
||||
for collection in collections: |
||||
where_filter = {"_id": {"$in": self.arango_ids}} if filter else {} |
||||
chunks = self.get_chromadb().query( |
||||
query=query, |
||||
collection=collection, |
||||
n_results=n_results, |
||||
n_sources=n_sources, |
||||
where=where_filter, |
||||
max_retries=3, |
||||
) |
||||
for doc, meta, dist in zip( |
||||
chunks["documents"][0], |
||||
chunks["metadatas"][0], |
||||
chunks["distances"][0], |
||||
): |
||||
combined_chunks.append( |
||||
{"document": doc, "metadata": meta, "distance": dist} |
||||
) |
||||
combined_chunks.sort(key=lambda x: x["distance"]) |
||||
|
||||
# Keep the best chunks according to n_sources |
||||
sources = set() |
||||
closest_chunks = [] |
||||
for chunk in combined_chunks: |
||||
source_id = chunk["metadata"].get("_id", "no_id") |
||||
if source_id not in sources: |
||||
sources.add(source_id) |
||||
closest_chunks.append(chunk) |
||||
if len(sources) >= n_sources: |
||||
break |
||||
if len(closest_chunks) < n_results: |
||||
remaining_chunks = [ |
||||
c for c in combined_chunks if c not in closest_chunks |
||||
] |
||||
closest_chunks.extend(remaining_chunks[: n_results - len(closest_chunks)]) |
||||
|
||||
# Now fetch real metadata from Arango |
||||
for chunk in closest_chunks: |
||||
_id = chunk["metadata"].get("_id") |
||||
if not _id: |
||||
continue |
||||
if _id.startswith("sci_articles"): |
||||
arango_doc = self.base_arango.db.document(_id) |
||||
else: |
||||
arango_doc = self.user_arango.db.document(_id) |
||||
if arango_doc: |
||||
arango_metadata = arango_doc.get("metadata", {}) |
||||
# Possibly merge notes |
||||
if "user_notes" in arango_doc: |
||||
arango_metadata["user_notes"] = arango_doc["user_notes"] |
||||
chunk["metadata"] = arango_metadata |
||||
|
||||
# Group by article title |
||||
grouped_chunks = {} |
||||
article_number = 1 |
||||
for chunk in closest_chunks: |
||||
title = chunk["metadata"].get("title", "No title") |
||||
chunk["article_number"] = article_number |
||||
if title not in grouped_chunks: |
||||
grouped_chunks[title] = { |
||||
"article_number": article_number, |
||||
"chunks": [], |
||||
} |
||||
article_number += 1 |
||||
grouped_chunks[title]["chunks"].append(chunk) |
||||
return grouped_chunks |
||||
|
||||
def answer_tool_call(self, response, user_input): |
||||
bot_responses = [] |
||||
# This method returns / stores responses (no Streamlit calls) |
||||
if not response.get("tool_calls"): |
||||
return "" |
||||
|
||||
for tool in response.get("tool_calls"): |
||||
function_name = tool.function.get('name') |
||||
arguments = tool.function.arguments |
||||
arguments["query"] = user_input |
||||
|
||||
if hasattr(self, function_name): |
||||
if function_name in [ |
||||
"fetch_other_documents_tool", |
||||
"fetch_science_articles_tool", |
||||
"fetch_science_articles_and_other_documents_tool", |
||||
]: |
||||
chunks = getattr(self, function_name)(**arguments) |
||||
bot_responses.append( |
||||
self.generate_from_chunks(user_input, chunks).strip('"') |
||||
) |
||||
elif function_name == "fetch_notes_tool": |
||||
notes = getattr(self, function_name)() |
||||
bot_responses.append( |
||||
self.generate_from_notes(user_input, notes).strip('"') |
||||
) |
||||
elif function_name == "conversational_response_tool": |
||||
bot_responses.append( |
||||
getattr(self, function_name)(user_input).strip('"') |
||||
) |
||||
return "\n\n".join(bot_responses) |
||||
|
||||
def process_user_input(self, user_input, content_attachment=None): |
||||
# Add user message |
||||
self.chat.add_message("user", user_input) |
||||
|
||||
if not content_attachment: |
||||
prompt = get_tools_prompt(user_input) |
||||
response = self.toolbot.generate(prompt, tools=self.tools, stream=False) |
||||
if response.get("tool_calls"): |
||||
bot_response = self.answer_tool_call(response, user_input) |
||||
else: |
||||
# Just respond directly |
||||
bot_response = response.content.strip('"') |
||||
else: |
||||
# If there's an attachment, do something minimal |
||||
bot_response = "Content attachment received (Base Bot)." |
||||
|
||||
# Add assistant message |
||||
if self.chat.chat_history[-1]["role"] != "assistant": |
||||
self.chat.add_message("assistant", bot_response) |
||||
|
||||
# Update in Arango |
||||
self.chat.update_in_arango() |
||||
return bot_response |
||||
|
||||
def generate_from_notes(self, user_input, notes): |
||||
# No Streamlit calls |
||||
notes_string = "" |
||||
for note in notes: |
||||
notes_string += f"\n# {note.get('title','No title')}\n{note.get('content','')}\n---\n" |
||||
prompt = get_chat_prompt(user_input, content_string=notes_string, role=self.chat.role) |
||||
return self.chatbot.generate(prompt, stream=True) |
||||
|
||||
def generate_from_chunks(self, user_input, chunks): |
||||
# No Streamlit calls |
||||
chunks_string = "" |
||||
for title, group in chunks.items(): |
||||
user_notes_string = "" |
||||
if "user_notes" in group["chunks"][0]["metadata"]: |
||||
notes = group["chunks"][0]["metadata"]["user_notes"] |
||||
user_notes_string = f'\n\nUser notes:\n"""\n{notes}\n"""\n\n' |
||||
docs = "\n(...)\n".join([c["document"] for c in group["chunks"]]) |
||||
chunks_string += ( |
||||
f"\n# {title}\n## Article #{group['article_number']}\n{user_notes_string}{docs}\n---\n" |
||||
) |
||||
prompt = get_chat_prompt(user_input, content_string=chunks_string, role=self.chat.role) |
||||
return self.chatbot.generate(prompt, stream=True) |
||||
|
||||
def run(self): |
||||
# Base Bot has no Streamlit run loop |
||||
pass |
||||
|
||||
def get_notes(self): |
||||
# Minimal note retrieval |
||||
notes = self.user_arango.db.aql.execute( |
||||
f'FOR doc IN notes FILTER doc.project == "{self.project.name if self.project else ""}" RETURN doc' |
||||
) |
||||
return list(notes) |
||||
|
||||
@ToolRegistry.register |
||||
def fetch_science_articles_tool(self, query: str, n_documents: int): |
||||
""" |
||||
"Fetches information from scientific articles. Use this tool when the user is looking for information from scientific articles." |
||||
|
||||
Parameters: |
||||
query (str): The search query to find relevant scientific articles. |
||||
n_documents (int): How many documents to fetch. A complex query may require more documents. Min: 3, Max: 10. |
||||
|
||||
Returns: |
||||
list: A list of chunks containing information from the fetched scientific articles. |
||||
""" |
||||
print_purple('Query:', query) |
||||
|
||||
n_documents = int(n_documents) |
||||
if n_documents < 3: |
||||
n_documents = 3 |
||||
elif n_documents > 10: |
||||
n_documents = 10 |
||||
return self.get_chunks( |
||||
query, collections=["sci_articles"], n_results=n_documents |
||||
) |
||||
|
||||
@ToolRegistry.register |
||||
def fetch_other_documents_tool(self, query: str, n_documents: int): |
||||
""" |
||||
Fetches information from other documents based on the user's query. |
||||
|
||||
This method retrieves information from various types of documents such as reports, news articles, and other texts. It should be used only when it is clear that the user is not seeking scientific articles. |
||||
|
||||
Args: |
||||
query (str): The search query provided by the user. |
||||
n_documents (int): How many documents to fetch. A complex query may require more documents. Min: 2, Max: 10. |
||||
|
||||
Returns: |
||||
list: A list of document chunks that match the query. |
||||
""" |
||||
assert isinstance(self, Bot), "The first argument must be a Bot object." |
||||
n_documents = int(n_documents) |
||||
if n_documents < 2: |
||||
n_documents = 2 |
||||
elif n_documents > 10: |
||||
n_documents = 10 |
||||
return self.get_chunks( |
||||
query, |
||||
collections=[f"{self.username}__other_documents"], |
||||
n_results=n_documents, |
||||
) |
||||
|
||||
@ToolRegistry.register |
||||
def fetch_science_articles_and_other_documents_tool( |
||||
self, query: str, n_documents: int |
||||
): |
||||
""" |
||||
Fetches information from both scientific articles and other documents. |
||||
|
||||
This method is often used when the user hasn't specified what kind of sources they are interested in. |
||||
|
||||
Args: |
||||
query (str): The search query to fetch information for. |
||||
n_documents (int): How many documents to fetch. A complex query may require more documents. Min: 3, Max: 10. |
||||
|
||||
Returns: |
||||
list: A list of document chunks that match the search query. |
||||
""" |
||||
assert isinstance(self, Bot), "The first argument must be a Bot object." |
||||
n_documents = int(n_documents) |
||||
if n_documents < 3: |
||||
n_documents = 3 |
||||
elif n_documents > 10: |
||||
n_documents = 10 |
||||
return self.get_chunks( |
||||
query, |
||||
collections=["sci_articles", f"{self.username}__other_documents"], |
||||
n_results=n_documents, |
||||
) |
||||
|
||||
@ToolRegistry.register |
||||
def fetch_notes_tool(bot): |
||||
""" |
||||
Fetches information from the project notes when you as an editor need context from the project notes to understand other information. ONLY use this together with other tools! No arguments needed. |
||||
|
||||
Returns: |
||||
list: A list of notes. |
||||
""" |
||||
assert isinstance(bot, Bot), "The first argument must be a Bot object." |
||||
return bot.get_notes() |
||||
|
||||
@ToolRegistry.register |
||||
def conversational_response_tool(self, query: str): |
||||
""" |
||||
Generate a conversational response to a user's query. |
||||
|
||||
This method is designed to provide a short and conversational response |
||||
without fetching additional data. It should be used only when it is clear |
||||
that the user is engaging in small talk (like saying 'hi') and not seeking detailed information. |
||||
|
||||
Args: |
||||
query (str): The user's message to which the bot should respond. |
||||
|
||||
Returns: |
||||
str: The generated conversational response. |
||||
""" |
||||
query = f""" |
||||
User message: "{query}". |
||||
Make your answer short and conversational. |
||||
This is perhaps not a conversation about a journalistic project, so try not to be too informative. |
||||
Don't answer with anything you're not sure of! |
||||
""" |
||||
|
||||
result = ( |
||||
self.chatbot.generate(query, stream=True) |
||||
if self.chatbot |
||||
else self.llm.generate(query, stream=True) |
||||
) |
||||
return result |
||||
|
||||
class StreamlitBot(Bot): |
||||
def __init__(self, username: str, chat: StreamlitChat = None, tools: list = None, **kwargs): |
||||
print_purple("StreamlitBot init chat:", chat) |
||||
super().__init__(username=username, chat=chat, tools=tools, **kwargs) |
||||
|
||||
# For Streamlit, we can override or add attributes |
||||
if 'llm_chosen_backend' not in st.session_state: |
||||
st.session_state['llm_chosen_backend'] = None |
||||
|
||||
self.chatbot.chosen_backend = st.session_state['llm_chosen_backend'] |
||||
if not st.session_state['llm_chosen_backend']: |
||||
st.session_state['llm_chosen_backend'] = self.chatbot.chosen_backend |
||||
|
||||
def run(self): |
||||
# Example Streamlit run loop |
||||
self.chat.show_chat_history() |
||||
if user_input := st.chat_input("Write your message here...", accept_file=True): |
||||
text_input = user_input.text.replace('"""', "---") |
||||
if len(user_input.files) > 1: |
||||
st.error("Please upload only one file at a time.") |
||||
return |
||||
attached_file = user_input.files[0] if user_input.files else None |
||||
|
||||
content_attachment = None |
||||
if attached_file: |
||||
if attached_file.type == "application/pdf": |
||||
import fitz |
||||
pdf_document = fitz.open(stream=attached_file.read(), filetype="pdf") |
||||
pdf_text = "" |
||||
for page_num in range(len(pdf_document)): |
||||
page = pdf_document.load_page(page_num) |
||||
pdf_text += page.get_text() |
||||
content_attachment = pdf_text |
||||
elif attached_file.type in ["image/png", "image/jpeg"]: |
||||
self.chat.message_attachments = "image" |
||||
content_attachment = attached_file.read() |
||||
with st.chat_message("user", avatar=self.chat.get_avatar(role="user")): |
||||
st.image(content_attachment) |
||||
|
||||
with st.chat_message("user", avatar=self.chat.get_avatar(role="user")): |
||||
st.write(text_input) |
||||
|
||||
if not self.chat.name: |
||||
self.chat.set_name(text_input) |
||||
self.chat.last_updated = datetime.now().isoformat() |
||||
self.chat.saved = False |
||||
self.user_arango.db.collection("chats").insert( |
||||
self.chat.to_dict(), overwrite=True, overwrite_mode="update" |
||||
) |
||||
|
||||
self.process_user_input(text_input, content_attachment) |
||||
|
||||
def process_user_input(self, user_input, content_attachment=None): |
||||
# We override to show messages in Streamlit instead of just storing |
||||
self.chat.add_message("user", user_input) |
||||
if not content_attachment: |
||||
prompt = get_tools_prompt(user_input) |
||||
response = self.toolbot.generate(prompt, tools=self.tools, stream=False) |
||||
if response.get("tool_calls"): |
||||
bot_response = self.answer_tool_call(response, user_input) |
||||
else: |
||||
bot_response = response.content.strip('"') |
||||
with st.chat_message("assistant", avatar=self.chat.get_avatar(role="assistant")): |
||||
st.write(bot_response) |
||||
else: |
||||
with st.chat_message("assistant", avatar=self.chat.get_avatar(role="assistant")): |
||||
with st.spinner("Reading the content..."): |
||||
if self.chat.message_attachments == "image": |
||||
prompt = get_chat_prompt(user_input, role=self.chat.role, image_attachment=True) |
||||
bot_resp = self.chatbot.generate(prompt, stream=False, images=[content_attachment], model="vision") |
||||
st.write(bot_resp) |
||||
bot_response = bot_resp |
||||
else: |
||||
prompt = get_chat_prompt(user_input, content_attachment=content_attachment, role=self.chat.role) |
||||
response = self.chatbot.generate(prompt, stream=True) |
||||
bot_response = st.write_stream(response) |
||||
|
||||
if self.chat.chat_history[-1]["role"] != "assistant": |
||||
self.chat.add_message("assistant", bot_response) |
||||
|
||||
self.chat.update_in_arango() |
||||
|
||||
def answer_tool_call(self, response, user_input): |
||||
bot_responses = [] |
||||
for tool in response.get("tool_calls", []): |
||||
function_name = tool.function.get('name') |
||||
arguments = tool.function.arguments |
||||
arguments["query"] = user_input |
||||
|
||||
with st.chat_message("assistant", avatar=self.chat.get_avatar(role="assistant")): |
||||
if function_name in [ |
||||
"fetch_other_documents_tool", |
||||
"fetch_science_articles_tool", |
||||
"fetch_science_articles_and_other_documents_tool", |
||||
]: |
||||
chunks = getattr(self, function_name)(**arguments) |
||||
response_text = self.generate_from_chunks(user_input, chunks) |
||||
bot_response = st.write_stream(response_text).strip('"') |
||||
if chunks: |
||||
sources = "###### Sources:\n" |
||||
for title, group in chunks.items(): |
||||
j = group["chunks"][0]["metadata"].get("journal", "No Journal") |
||||
d = group["chunks"][0]["metadata"].get("published_date", "No Date") |
||||
sources += f"[{group['article_number']}] **{title}** :gray[{j} ({d})]\n" |
||||
st.markdown(sources) |
||||
bot_response += f"\n\n{sources}" |
||||
bot_responses.append(bot_response) |
||||
|
||||
elif function_name == "fetch_notes_tool": |
||||
notes = getattr(self, function_name)() |
||||
response_text = self.generate_from_notes(user_input, notes) |
||||
bot_responses.append(st.write_stream(response_text).strip('"')) |
||||
|
||||
elif function_name == "conversational_response_tool": |
||||
response_text = getattr(self, function_name)(user_input) |
||||
bot_responses.append(st.write_stream(response_text).strip('"')) |
||||
|
||||
return "\n\n".join(bot_responses) |
||||
|
||||
def generate_from_notes(self, user_input, notes): |
||||
with st.spinner("Reading project notes..."): |
||||
return super().generate_from_notes(user_input, notes) |
||||
|
||||
def generate_from_chunks(self, user_input, chunks): |
||||
# For reading articles with a spinner |
||||
magazines = set() |
||||
for group in chunks.values(): |
||||
j = group["chunks"][0]["metadata"].get("journal", "No Journal") |
||||
magazines.add(f"*{j}*") |
||||
s = ( |
||||
f"Reading articles from {', '.join(list(magazines)[:-1])} and {list(magazines)[-1]}..." |
||||
if len(magazines) > 1 |
||||
else "Reading articles..." |
||||
) |
||||
with st.spinner(s): |
||||
return super().generate_from_chunks(user_input, chunks) |
||||
|
||||
def sidebar_content(self): |
||||
with st.sidebar: |
||||
st.write("---") |
||||
st.markdown(f'#### {self.chat.name if self.chat.name else ""}') |
||||
st.button("Delete this chat", on_click=self.delete_chat) |
||||
|
||||
def delete_chat(self): |
||||
self.user_arango.db.collection("chats").delete_match( |
||||
filters={"name": self.chat.name} |
||||
) |
||||
self.chat = Chat() |
||||
|
||||
def get_notes(self): |
||||
# We can show a spinner or messages too |
||||
with st.spinner("Fetching notes..."): |
||||
return super().get_notes() |
||||
|
||||
|
||||
class EditorBot(StreamlitBot(Bot)): |
||||
def __init__(self, chat: Chat, username: str, **kwargs): |
||||
print_blue("EditorBot init chat:", chat) |
||||
super().__init__(chat=chat, username=username, **kwargs) |
||||
self.role = "Editor" |
||||
self.tools = ToolRegistry.get_tools() |
||||
self.chatbot = LLM( |
||||
system_message=get_editor_prompt(kwargs.get("project")), |
||||
messages=self.chat.chat_history2bot(), |
||||
chosen_backend=kwargs.get("chosen_backend"), |
||||
) |
||||
|
||||
|
||||
class ResearchAssistantBot(StreamlitBot(Bot)): |
||||
def __init__(self, chat: Chat, username: str, **kwargs): |
||||
super().__init__(chat=chat, username=username, **kwargs) |
||||
self.role = "Research Assistant" |
||||
self.chatbot = LLM( |
||||
system_message=get_assistant_prompt(), |
||||
temperature=0.1, |
||||
messages=self.chat.chat_history2bot(), |
||||
) |
||||
self.tools = [ |
||||
self.fetch_science_articles_tool, |
||||
self.fetch_science_articles_and_other_documents_tool, |
||||
] |
||||
|
||||
|
||||
class PodBot(StreamlitBot(Bot)): |
||||
"""Two LLM agents construct a conversation using material from science articles.""" |
||||
|
||||
def __init__( |
||||
self, |
||||
chat: Chat, |
||||
subject: str, |
||||
username: str, |
||||
instructions: str = None, |
||||
**kwargs, |
||||
): |
||||
super().__init__(chat=chat, username=username, **kwargs) |
||||
self.subject = subject |
||||
self.instructions = instructions |
||||
self.guest_name = kwargs.get("name_guest", "Merit") |
||||
self.hostbot = HostBot( |
||||
Chat(username=self.username, role="Host"), |
||||
subject, |
||||
username, |
||||
instructions=instructions, |
||||
**kwargs, |
||||
) |
||||
self.guestbot = GuestBot( |
||||
Chat(username=self.username, role="Guest"), |
||||
subject, |
||||
username, |
||||
name_guest=self.guest_name, |
||||
**kwargs, |
||||
) |
||||
|
||||
def run(self): |
||||
|
||||
notes = self.get_notes() |
||||
notes_string = "" |
||||
if self.instructions: |
||||
instructions_string = f''' |
||||
These are the instructions for the podcast from the producer: |
||||
""" |
||||
{self.instructions} |
||||
""" |
||||
''' |
||||
else: |
||||
instructions_string = "" |
||||
|
||||
for note in notes: |
||||
notes_string += f"\n# {note['title']}\n{note['content']}\n---\n" |
||||
a = f'''You will make a podcast interview with {self.guest_name}, an expert on "{self.subject}". |
||||
{instructions_string} |
||||
Below are notes on the subject that you can use to ask relevant questions: |
||||
""" |
||||
{notes_string} |
||||
""" |
||||
Say hello to the expert and start the interview. Remember to keep the interview to the subject of {self.subject} throughout the conversation. |
||||
''' |
||||
|
||||
# Stop button for the podcast |
||||
with st.sidebar: |
||||
stop = st.button("Stop podcast", on_click=self.stop_podcast) |
||||
|
||||
while st.session_state["make_podcast"]: |
||||
# Stop the podcast if there are more than 14 messages in the chat |
||||
self.chat.show_chat_history() |
||||
if len(self.chat.chat_history) == 14: |
||||
result = self.hostbot.generate( |
||||
"The interview has ended. Say thank you to the expert and end the conversation." |
||||
) |
||||
self.chat.add_message("Host", result) |
||||
with st.chat_message( |
||||
"assistant", avatar=self.chat.get_avatar(role="assistant") |
||||
): |
||||
st.write(result.strip('"')) |
||||
st.stop() |
||||
|
||||
_q = self.hostbot.toolbot.generate( |
||||
query=f"{self.guest_name} has answered: {a}. You have to choose a tool to help the host continue the interview.", |
||||
tools=self.hostbot.tools, |
||||
temperature=0.6, |
||||
stream=False, |
||||
) |
||||
if "tool_calls" in _q: |
||||
q = self.hostbot.answer_tool_call(_q, a) |
||||
else: |
||||
q = _q |
||||
|
||||
self.chat.add_message("Host", q) |
||||
|
||||
_a = self.guestbot.toolbot.generate( |
||||
f'The podcast host has asked: "{q}" Choose a tool to help the expert answer with relevant facts and information.', |
||||
tools=self.guestbot.tools, |
||||
) |
||||
if "tool_calls" in _a: |
||||
print_yellow("Tool call response (guest)", _a) |
||||
print_yellow(self.guestbot.chat.role) |
||||
a = self.guestbot.answer_tool_call(_a, q) |
||||
else: |
||||
a = _a |
||||
self.chat.add_message("Guest", a) |
||||
|
||||
self.update_session_state() |
||||
|
||||
def stop_podcast(self): |
||||
st.session_state["make_podcast"] = False |
||||
self.update_session_state() |
||||
self.chat.show_chat_history() |
||||
|
||||
|
||||
class HostBot(StreamlitBot(Bot)): |
||||
def __init__( |
||||
self, chat: Chat, subject: str, username: str, instructions: str, **kwargs |
||||
): |
||||
super().__init__(chat=chat, username=username, **kwargs) |
||||
self.chat.role = kwargs.get("role", "Host") |
||||
self.tools = ToolRegistry.get_tools( |
||||
tools=[ |
||||
self.fetch_notes_tool, |
||||
self.conversational_response_tool, |
||||
# "fetch_other_documents", #TODO Should this be included? |
||||
] |
||||
) |
||||
self.instructions = instructions |
||||
self.llm = LLM( |
||||
system_message=f''' |
||||
You are the host of a podcast and an expert on {subject}. You will ask one question at a time about the subject, and then wait for the guest to answer. |
||||
Don't ask the guest to talk about herself/himself, only about the subject. |
||||
Make your questions short and clear, only if necessary add a brief context to the question. |
||||
These are the instructions for the podcast from the producer: |
||||
""" |
||||
{self.instructions} |
||||
""" |
||||
If the experts' answer is complicated, try to make a very brief summary of it for the audience to understand. You can also ask follow-up questions to clarify the answer, or ask for examples. |
||||
''', |
||||
messages=self.chat.chat_history2bot() |
||||
) |
||||
self.toolbot = LLM( |
||||
temperature=0, |
||||
system_message=""" |
||||
You are assisting a podcast host in asking questions to an expert. |
||||
Choose one or many tools to use in order to assist the host in asking relevant questions. |
||||
Often "conversational_response_tool" is enough, but sometimes project notes are needed. |
||||
Make sure to read the description of the tools carefully!""", |
||||
chat=False, |
||||
model="small", |
||||
) |
||||
|
||||
def generate(self, query): |
||||
return self.llm.generate(query) |
||||
|
||||
|
||||
class GuestBot(StreamlitBot(Bot)): |
||||
def __init__(self, chat: Chat, subject: str, username: str, **kwargs): |
||||
super().__init__(chat=chat, username=username, **kwargs) |
||||
self.chat.role = kwargs.get("role", "Guest") |
||||
self.tools = ToolRegistry.get_tools( |
||||
tools=[ |
||||
self.fetch_notes_tool, |
||||
self.fetch_science_articles_tool, |
||||
] |
||||
) |
||||
|
||||
self.llm = LLM( |
||||
system_message=f""" |
||||
You are {kwargs.get('name', 'Merit')}, an expert on {subject}. |
||||
Today you are a guest in a podcast about {subject}. A host will ask you questions about the subject and you will answer by using scientific facts and information. |
||||
When answering, don't say things like "based on the documents" or alike, as neither the host nor the audience can see the documents. Act just as if you were talking to someone in a conversation. |
||||
Try to be concise when answering, and remember that the audience of the podcast is not expert on the subject, so don't complicate things too much. |
||||
It's very important that you answer in a "spoken" way, as if you were talking to someone in a conversation. That means you should avoid using scientific jargon and complex terms, too many figures or abstract concepts. |
||||
Lists are also not recommended, instead use "for the first reason", "secondly", etc. |
||||
Instead, use "..." to indicate a pause, "-" to indicate a break in the sentence, as if you were speaking. |
||||
""", |
||||
messages=self.chat.chat_history2bot() |
||||
) |
||||
self.toolbot = LLM( |
||||
temperature=0, |
||||
system_message=f"You are an assistant to an expert on {subject}. Choose one or many tools to use in order to assist the expert in answering questions. Make sure to read the description of the tools carefully.", |
||||
chat=False, |
||||
model="small", |
||||
) |
||||
|
||||
def generate(self, query): |
||||
return self.llm.generate(query) |
||||
@ -1,574 +0,0 @@ |
||||
import os |
||||
import base64 |
||||
import re |
||||
from typing import Literal, Optional |
||||
import requests |
||||
import tiktoken |
||||
from ollama import ( |
||||
Client, |
||||
AsyncClient, |
||||
ResponseError, |
||||
ChatResponse, |
||||
Tool, |
||||
Options, |
||||
) |
||||
|
||||
import env_manager |
||||
from colorprinter.print_color import * |
||||
|
||||
env_manager.set_env() |
||||
|
||||
tokenizer = tiktoken.get_encoding("cl100k_base") |
||||
|
||||
|
||||
class LLM: |
||||
""" |
||||
LLM class for interacting with an instance of Ollama. |
||||
|
||||
Attributes: |
||||
model (str): The model to be used for response generation. |
||||
system_message (str): The system message to be used in the chat. |
||||
options (dict): Options for the model, such as temperature. |
||||
messages (list): List of messages in the chat. |
||||
max_length_answer (int): Maximum length of the generated answer. |
||||
chat (bool): Whether the chat mode is enabled. |
||||
chosen_backend (str): The chosen backend server for the API. |
||||
client (Client): The client for synchronous API calls. |
||||
async_client (AsyncClient): The client for asynchronous API calls. |
||||
tools (list): List of tools to be used in generating the response. |
||||
|
||||
Methods: |
||||
__init__(self, system_message, temperature, model, max_length_answer, messages, chat, chosen_backend): |
||||
Initializes the LLM class with the provided parameters. |
||||
|
||||
get_model(self, model_alias): |
||||
Retrieves the model name based on the provided alias. |
||||
|
||||
count_tokens(self): |
||||
Counts the number of tokens in the messages. |
||||
|
||||
get_least_conn_server(self): |
||||
Retrieves the least connected server from the backend. |
||||
|
||||
generate(self, query, user_input, context, stream, tools, images, model, temperature): |
||||
Generates a response based on the provided query and options. |
||||
|
||||
make_summary(self, text): |
||||
Generates a summary of the provided text. |
||||
|
||||
read_stream(self, response): |
||||
Handles streaming responses. |
||||
|
||||
async_generate(self, query, user_input, context, stream, tools, images, model, temperature): |
||||
Asynchronously generates a response based on the provided query and options. |
||||
|
||||
prepare_images(self, images, message): |
||||
""" |
||||
|
||||
def __init__( |
||||
self, |
||||
system_message: str = "You are an assistant.", |
||||
temperature: float = 0.01, |
||||
model: Optional[ |
||||
Literal["small", "standard", "vision", "reasoning", "tools"] |
||||
] = "standard", |
||||
max_length_answer: int = 4096, |
||||
messages: list[dict] = None, |
||||
chat: bool = True, |
||||
chosen_backend: str = None, |
||||
tools: list = None, |
||||
) -> None: |
||||
""" |
||||
Initialize the assistant with the given parameters. |
||||
|
||||
Args: |
||||
system_message (str): The initial system message for the assistant. Defaults to "You are an assistant.". |
||||
temperature (float): The temperature setting for the model, affecting randomness. Defaults to 0.01. |
||||
model (Optional[Literal["small", "standard", "vision", "reasoning"]]): The model type to use. Defaults to "standard". |
||||
max_length_answer (int): The maximum length of the generated answer. Defaults to 4096. |
||||
messages (list[dict], optional): A list of initial messages. Defaults to None. |
||||
chat (bool): Whether the assistant is in chat mode. Defaults to True. |
||||
chosen_backend (str, optional): The backend server to use. If not provided, the least connected server is chosen. |
||||
|
||||
Returns: |
||||
None |
||||
""" |
||||
|
||||
self.model = self.get_model(model) |
||||
self.call_model = ( |
||||
self.model |
||||
) # This is set per call to decide what model that was actually used |
||||
self.system_message = system_message |
||||
self.options = {"temperature": temperature} |
||||
self.messages = messages or [{"role": "system", "content": self.system_message}] |
||||
self.max_length_answer = max_length_answer |
||||
self.chat = chat |
||||
|
||||
if not chosen_backend: |
||||
chosen_backend = self.get_least_conn_server() |
||||
self.chosen_backend = chosen_backend |
||||
|
||||
|
||||
headers = { |
||||
"Authorization": f"Basic {self.get_credentials()}", |
||||
"X-Chosen-Backend": self.chosen_backend, |
||||
} |
||||
self.host_url = os.getenv("LLM_API_URL").rstrip("/api/chat/") |
||||
self.host_url = 'http://192.168.1.12:3300' #! Change back when possible |
||||
self.client: Client = Client(host=self.host_url, headers=headers, timeout=120) |
||||
self.async_client: AsyncClient = AsyncClient() |
||||
|
||||
def get_credentials(self): |
||||
# Initialize the client with the host and default headers |
||||
credentials = f"{os.getenv('LLM_API_USER')}:{os.getenv('LLM_API_PWD_LASSE')}" |
||||
return base64.b64encode(credentials.encode()).decode() |
||||
|
||||
def get_model(self, model_alias): |
||||
|
||||
models = { |
||||
"standard": "LLM_MODEL", |
||||
"small": "LLM_MODEL_SMALL", |
||||
"vision": "LLM_MODEL_VISION", |
||||
"standard_64k": "LLM_MODEL_LARGE", |
||||
"reasoning": "LLM_MODEL_REASONING", |
||||
"tools": "LLM_MODEL_TOOLS", |
||||
} |
||||
model = os.getenv(models.get(model_alias, "LLM_MODEL")) |
||||
self.model = model |
||||
return model |
||||
|
||||
def count_tokens(self): |
||||
num_tokens = 0 |
||||
for i in self.messages: |
||||
for k, v in i.items(): |
||||
if k == "content": |
||||
if not isinstance(v, str): |
||||
v = str(v) |
||||
tokens = tokenizer.encode(v) |
||||
num_tokens += len(tokens) |
||||
return int(num_tokens) |
||||
|
||||
def get_least_conn_server(self): |
||||
try: |
||||
response = requests.get("http://192.168.1.12:5000/least_conn") |
||||
response.raise_for_status() |
||||
# Extract the least connected server from the response |
||||
least_conn_server = response.headers.get("X-Upstream-Address") |
||||
return least_conn_server |
||||
except requests.RequestException as e: |
||||
print_red("Error getting least connected server:", e) |
||||
return None |
||||
|
||||
def generate( |
||||
self, |
||||
query: str = None, |
||||
user_input: str = None, |
||||
context: str = None, |
||||
stream: bool = False, |
||||
tools: list = None, |
||||
images: list = None, |
||||
model: Optional[ |
||||
Literal["small", "standard", "vision", "reasoning", "tools"] |
||||
] = None, |
||||
temperature: float = None, |
||||
messages: list[dict] = None, |
||||
format = None, |
||||
think = False |
||||
): |
||||
""" |
||||
Generate a response based on the provided query and context. |
||||
Parameters: |
||||
query (str): The query string from the user. |
||||
user_input (str): Additional user input to be appended to the last message. |
||||
context (str): Contextual information to be used in generating the response. |
||||
stream (bool): Whether to stream the response. |
||||
tools (list): List of tools to be used in generating the response. |
||||
images (list): List of images to be included in the response. |
||||
model (Optional[Literal["small", "standard", "vision", "tools"]]): The model type to be used. |
||||
temperature (float): The temperature setting for the model. |
||||
messages (list[dict]): List of previous messages in the conversation. |
||||
format (Optional[BaseModel]): The format of the response. |
||||
think (bool): Whether to use the reasoning model. |
||||
|
||||
Returns: |
||||
str: The generated response or an error message if an exception occurs. |
||||
""" |
||||
print_yellow(stream) |
||||
print_yellow("GENERATE") |
||||
# Prepare the model and temperature |
||||
|
||||
model = self.get_model(model) if model else self.model |
||||
# if model == self.get_model('tools'): |
||||
# stream = False |
||||
temperature = temperature if temperature else self.options["temperature"] |
||||
|
||||
if messages: |
||||
messages = [ |
||||
{"role": i["role"], "content": re.sub(r"\s*\n\s*", "\n", i["content"])} |
||||
for i in messages |
||||
] |
||||
message = messages.pop(-1) |
||||
query = message["content"] |
||||
self.messages = messages |
||||
else: |
||||
# Normalize whitespace and add the query to the messages |
||||
query = re.sub(r"\s*\n\s*", "\n", query) |
||||
message = {"role": "user", "content": query} |
||||
|
||||
# Handle images if any |
||||
if images: |
||||
message = self.prepare_images(images, message) |
||||
model = self.get_model("vision") |
||||
|
||||
self.messages.append(message) |
||||
|
||||
# Prepare headers |
||||
headers = {"Authorization": f"Basic {self.get_credentials()}"} |
||||
if self.chosen_backend and model not in [self.get_model("vision"), self.get_model("tools"), self.get_model("reasoning")]: #TODO Maybe reasoning shouldn't be here. |
||||
headers["X-Chosen-Backend"] = self.chosen_backend |
||||
|
||||
if model == self.get_model("small"): |
||||
headers["X-Model-Type"] = "small" |
||||
if model == self.get_model("tools"): |
||||
headers["X-Model-Type"] = "tools" |
||||
|
||||
reasoning_models = ['qwen3', 'deepseek'] #TODO Add more reasoning models here when added to ollama |
||||
if any([model_name in model for model_name in reasoning_models]): |
||||
if think: |
||||
query = f"/think\n{query}" |
||||
else: |
||||
query = f"/no_think\n{query}" |
||||
|
||||
# Prepare options |
||||
options = Options(**self.options) |
||||
options.temperature = temperature |
||||
|
||||
print_yellow("Stream the answer?", stream) |
||||
|
||||
# Call the client.chat method |
||||
try: |
||||
self.call_model = model |
||||
self.client: Client = Client(host=self.host_url, headers=headers, timeout=300) #! |
||||
#print_rainbow(self.client._client.__dict__) |
||||
print_yellow("Model used in call:", model) |
||||
# if headers: |
||||
# self.client.headers.update(headers) |
||||
|
||||
response = self.client.chat( |
||||
model=model, |
||||
messages=self.messages, |
||||
tools=tools, |
||||
stream=stream, |
||||
options=options, |
||||
keep_alive=3600 * 24 * 7, |
||||
format=format |
||||
) |
||||
|
||||
except ResponseError as e: |
||||
print_red("Error!") |
||||
print(e) |
||||
return "An error occurred." |
||||
# print_rainbow(response.__dict__) |
||||
# If user_input is provided, update the last message |
||||
|
||||
if user_input: |
||||
if context: |
||||
if len(context) > 2000: |
||||
context = self.make_summary(context) |
||||
user_input = ( |
||||
f"{user_input}\n\nUse the information below to answer the question.\n" |
||||
f'"""{context}"""\n[This is a summary of the context provided in the original message.]' |
||||
) |
||||
system_message_info = "\nSometimes some of the messages in the chat history are summarised, then that is clearly indicated in the message." |
||||
if system_message_info not in self.messages[0]["content"]: |
||||
self.messages[0]["content"] += system_message_info |
||||
self.messages[-1] = {"role": "user", "content": user_input} |
||||
|
||||
# self.chosen_backend = self.client.last_response.headers.get("X-Chosen-Backend") |
||||
|
||||
# Handle streaming response |
||||
if stream: |
||||
print_purple("STREAMING") |
||||
return self.read_stream(response) |
||||
else: |
||||
print_purple("NOT STREAMING") |
||||
# Process the response |
||||
if isinstance(response, ChatResponse): |
||||
result = response.message.content.strip('"') |
||||
if '</think>' in result: |
||||
result = result.split('</think>')[-1] |
||||
self.messages.append( |
||||
{"role": "assistant", "content": result.strip('"')} |
||||
) |
||||
if tools and not response.message.get("tool_calls"): |
||||
print_yellow("No tool calls in response".upper()) |
||||
if not self.chat: |
||||
self.messages = [self.messages[0]] |
||||
|
||||
if not think: |
||||
response.message.content = remove_thinking(response.message.content) |
||||
return response.message |
||||
else: |
||||
print_red("Unexpected response type") |
||||
return "An error occurred." |
||||
|
||||
def make_summary(self, text): |
||||
# Implement your summary logic using self.client.chat() |
||||
summary_message = { |
||||
"role": "user", |
||||
"content": f'Summarize the text below:\n"""{text}"""\nRemember to be concise and detailed. Answer in English.', |
||||
} |
||||
messages = [ |
||||
{ |
||||
"role": "system", |
||||
"content": "You are summarizing a text. Make it detailed and concise. Answer ONLY with the summary. Don't add any new information.", |
||||
}, |
||||
summary_message, |
||||
] |
||||
try: |
||||
response = self.client.chat( |
||||
model=self.get_model("small"), |
||||
messages=messages, |
||||
options=Options(temperature=0.01), |
||||
keep_alive=3600 * 24 * 7, |
||||
) |
||||
summary = response.message.content.strip() |
||||
print_blue("Summary:", summary) |
||||
return summary |
||||
except ResponseError as e: |
||||
print_red("Error generating summary:", e) |
||||
return "Summary generation failed." |
||||
|
||||
def read_stream(self, response): |
||||
""" |
||||
Yields tuples of (chunk_type, text). The first tuple is ('thinking', ...) |
||||
if in_thinking is True and stops at </think>. After that, yields ('normal', ...) |
||||
for the rest of the text. |
||||
""" |
||||
thinking_buffer = "" |
||||
in_thinking = self.call_model == self.get_model("reasoning") |
||||
first_chunk = True |
||||
prev_content = None |
||||
|
||||
for chunk in response: |
||||
if not chunk: |
||||
continue |
||||
content = chunk.message.content |
||||
|
||||
# Remove leading quote if it's the first chunk |
||||
if first_chunk and content.startswith('"'): |
||||
content = content[1:] |
||||
first_chunk = False |
||||
|
||||
if in_thinking: |
||||
thinking_buffer += content |
||||
if "</think>" in thinking_buffer: |
||||
end_idx = thinking_buffer.index("</think>") + len("</think>") |
||||
yield ("thinking", thinking_buffer[:end_idx]) |
||||
remaining = thinking_buffer[end_idx:].strip('"') |
||||
if chunk.done and remaining: |
||||
yield ("normal", remaining) |
||||
break |
||||
else: |
||||
prev_content = remaining |
||||
in_thinking = False |
||||
else: |
||||
if prev_content: |
||||
yield ("normal", prev_content) |
||||
prev_content = content |
||||
|
||||
if chunk.done: |
||||
if prev_content and prev_content.endswith('"'): |
||||
prev_content = prev_content[:-1] |
||||
if prev_content: |
||||
yield ("normal", prev_content) |
||||
break |
||||
|
||||
self.messages.append({"role": "assistant", "content": ""}) |
||||
|
||||
async def async_generate( |
||||
self, |
||||
query: str = None, |
||||
user_input: str = None, |
||||
context: str = None, |
||||
stream: bool = False, |
||||
tools: list = None, |
||||
images: list = None, |
||||
model: Optional[Literal["small", "standard", "vision"]] = None, |
||||
temperature: float = None, |
||||
): |
||||
""" |
||||
Asynchronously generates a response based on the provided query and other parameters. |
||||
|
||||
Args: |
||||
query (str, optional): The query string to generate a response for. |
||||
user_input (str, optional): Additional user input to be included in the response. |
||||
context (str, optional): Context information to be used in generating the response. |
||||
stream (bool, optional): Whether to stream the response. Defaults to False. |
||||
tools (list, optional): List of tools to be used in generating the response. Will set the model to 'tools'. |
||||
images (list, optional): List of images to be included in the response. |
||||
model (Optional[Literal["small", "standard", "vision", "tools"]], optional): The model to be used for generating the response. |
||||
temperature (float, optional): The temperature setting for the model. |
||||
|
||||
Returns: |
||||
str: The generated response or an error message if an exception occurs. |
||||
|
||||
Raises: |
||||
ResponseError: If an error occurs during the response generation. |
||||
|
||||
Notes: |
||||
- The function prepares the model and temperature settings. |
||||
- It normalizes whitespace in the query and handles images if provided. |
||||
- It prepares headers and options for the request. |
||||
- It adjusts options for long messages and calls the async client's chat method. |
||||
- If user_input is provided, it updates the last message. |
||||
- It updates the chosen backend based on the response headers. |
||||
- It handles streaming responses and processes the response accordingly. |
||||
- It's not neccecary to set model to 'tools' if you provide tools as an argument. |
||||
""" |
||||
print_yellow("ASYNC GENERATE") |
||||
# Normaliz e whitespace and add the query to the messages |
||||
query = re.sub(r"\s*\n\s*", "\n", query) |
||||
message = {"role": "user", "content": query} |
||||
self.messages.append(message) |
||||
|
||||
# Prepare the model and temperature |
||||
model = self.get_model(model) if model else self.model |
||||
temperature = temperature if temperature else self.options["temperature"] |
||||
|
||||
# Prepare options |
||||
options = Options(**self.options) |
||||
options.temperature = temperature |
||||
|
||||
# Prepare headers |
||||
headers = {} |
||||
|
||||
# Set model depending on the input |
||||
if images: |
||||
message = self.prepare_images(images, message) |
||||
model = self.get_model("vision") |
||||
elif tools: |
||||
model = self.get_model("tools") |
||||
headers["X-Model-Type"] = "tools" |
||||
tools = [Tool(**tool) if isinstance(tool, dict) else tool for tool in tools] |
||||
elif self.chosen_backend and model not in [self.get_model("vision"), self.get_model("tools"), self.get_model("reasoning")]: |
||||
headers["X-Chosen-Backend"] = self.chosen_backend |
||||
elif model == self.get_model("small"): |
||||
headers["X-Model-Type"] = "small" |
||||
|
||||
# Adjust options for long messages |
||||
if self.chat or len(self.messages) > 15000: |
||||
num_tokens = self.count_tokens() + self.max_length_answer // 2 |
||||
if num_tokens > 8000 and model not in [ |
||||
self.get_model("vision"), |
||||
self.get_model("tools"), |
||||
]: |
||||
model = self.get_model("standard_64k") |
||||
headers["X-Model-Type"] = "large" |
||||
|
||||
# Call the async client's chat method |
||||
try: |
||||
response = await self.async_client.chat( |
||||
model=model, |
||||
messages=self.messages, |
||||
headers=headers, |
||||
tools=tools, |
||||
stream=stream, |
||||
options=options, |
||||
keep_alive=3600 * 24 * 7, |
||||
) |
||||
except ResponseError as e: |
||||
print_red("Error!") |
||||
print(e) |
||||
return "An error occurred." |
||||
|
||||
# If user_input is provided, update the last message |
||||
if user_input: |
||||
if context: |
||||
if len(context) > 2000: |
||||
context = self.make_summary(context) |
||||
user_input = ( |
||||
f"{user_input}\n\nUse the information below to answer the question.\n" |
||||
f'"""{context}"""\n[This is a summary of the context provided in the original message.]' |
||||
) |
||||
system_message_info = "\nSometimes some of the messages in the chat history are summarised, then that is clearly indicated in the message." |
||||
if system_message_info not in self.messages[0]["content"]: |
||||
self.messages[0]["content"] += system_message_info |
||||
self.messages[-1] = {"role": "user", "content": user_input} |
||||
|
||||
print_red(self.async_client.last_response.headers.get("X-Chosen-Backend", "No backend")) |
||||
# Update chosen_backend |
||||
if model not in [self.get_model("vision"), self.get_model("tools"), self.get_model("reasoning")]: |
||||
self.chosen_backend = self.async_client.last_response.headers.get( |
||||
"X-Chosen-Backend" |
||||
) |
||||
|
||||
# Handle streaming response |
||||
if stream: |
||||
return self.read_stream(response) |
||||
else: |
||||
# Process the response |
||||
if isinstance(response, ChatResponse): |
||||
result = response.message.content.strip('"') |
||||
self.messages.append( |
||||
{"role": "assistant", "content": result.strip('"')} |
||||
) |
||||
if tools and not response.message.get("tool_calls"): |
||||
print_yellow("No tool calls in response".upper()) |
||||
if not self.chat: |
||||
self.messages = [self.messages[0]] |
||||
return result |
||||
else: |
||||
print_red("Unexpected response type") |
||||
return "An error occurred." |
||||
|
||||
def prepare_images(self, images, message): |
||||
""" |
||||
Prepares a list of images by converting them to base64 encoded strings and adds them to the provided message dictionary. |
||||
Args: |
||||
images (list): A list of images, where each image can be a file path (str), a base64 encoded string (str), or bytes. |
||||
message (dict): A dictionary to which the base64 encoded images will be added under the key "images". |
||||
Returns: |
||||
dict: The updated message dictionary with the base64 encoded images added under the key "images". |
||||
Raises: |
||||
ValueError: If an image is not a string or bytes. |
||||
""" |
||||
import base64 |
||||
|
||||
base64_images = [] |
||||
base64_pattern = re.compile(r"^[A-Za-z0-9+/]+={0,2}$") |
||||
|
||||
for image in images: |
||||
if isinstance(image, str): |
||||
if base64_pattern.match(image): |
||||
base64_images.append(image) |
||||
else: |
||||
with open(image, "rb") as image_file: |
||||
base64_images.append( |
||||
base64.b64encode(image_file.read()).decode("utf-8") |
||||
) |
||||
elif isinstance(image, bytes): |
||||
base64_images.append(base64.b64encode(image).decode("utf-8")) |
||||
else: |
||||
print_red("Invalid image type") |
||||
|
||||
message["images"] = base64_images |
||||
# Use the vision model |
||||
|
||||
return message |
||||
|
||||
def remove_thinking(response): |
||||
"""Remove the thinking section from the response""" |
||||
response_text = response.content if hasattr(response, "content") else str(response) |
||||
if "</think>" in response_text: |
||||
return response_text.split("</think>")[1].strip() |
||||
return response_text |
||||
|
||||
if __name__ == "__main__": |
||||
|
||||
llm = LLM() |
||||
|
||||
result = llm.generate( |
||||
query="I want to add 2 and 2", |
||||
) |
||||
print(result.content) |
||||
@ -0,0 +1,581 @@ |
||||
from _llm import LLM |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
llm = LLM() |
||||
|
||||
result = llm.generate( |
||||
query="I want to add 2 and 2", |
||||
think=True, |
||||
) |
||||
print(result) |
||||
# import os |
||||
# import base64 |
||||
# import re |
||||
# from typing import Literal, Optional |
||||
# from pydantic import BaseModel |
||||
# import requests |
||||
# import tiktoken |
||||
# from ollama import ( |
||||
# Client, |
||||
# AsyncClient, |
||||
# ResponseError, |
||||
# ChatResponse, |
||||
# Tool, |
||||
# Options, |
||||
# ) |
||||
|
||||
# import env_manager |
||||
# from colorprinter.print_color import * |
||||
|
||||
# env_manager.set_env() |
||||
|
||||
# tokenizer = tiktoken.get_encoding("cl100k_base") |
||||
|
||||
|
||||
# class LLM: |
||||
# """ |
||||
# LLM class for interacting with an instance of Ollama. |
||||
|
||||
# Attributes: |
||||
# model (str): The model to be used for response generation. |
||||
# system_message (str): The system message to be used in the chat. |
||||
# options (dict): Options for the model, such as temperature. |
||||
# messages (list): List of messages in the chat. |
||||
# max_length_answer (int): Maximum length of the generated answer. |
||||
# chat (bool): Whether the chat mode is enabled. |
||||
# chosen_backend (str): The chosen backend server for the API. |
||||
# client (Client): The client for synchronous API calls. |
||||
# async_client (AsyncClient): The client for asynchronous API calls. |
||||
# tools (list): List of tools to be used in generating the response. |
||||
|
||||
# Methods: |
||||
# __init__(self, system_message, temperature, model, max_length_answer, messages, chat, chosen_backend): |
||||
# Initializes the LLM class with the provided parameters. |
||||
|
||||
# get_model(self, model_alias): |
||||
# Retrieves the model name based on the provided alias. |
||||
|
||||
# count_tokens(self): |
||||
# Counts the number of tokens in the messages. |
||||
|
||||
# get_least_conn_server(self): |
||||
# Retrieves the least connected server from the backend. |
||||
|
||||
# generate(self, query, user_input, context, stream, tools, images, model, temperature): |
||||
# Generates a response based on the provided query and options. |
||||
|
||||
# make_summary(self, text): |
||||
# Generates a summary of the provided text. |
||||
|
||||
# read_stream(self, response): |
||||
# Handles streaming responses. |
||||
|
||||
# async_generate(self, query, user_input, context, stream, tools, images, model, temperature): |
||||
# Asynchronously generates a response based on the provided query and options. |
||||
|
||||
# prepare_images(self, images, message): |
||||
# """ |
||||
|
||||
# def __init__( |
||||
# self, |
||||
# system_message: str = "You are an assistant.", |
||||
# temperature: float = 0.01, |
||||
# model: Optional[ |
||||
# Literal["small", "standard", "vision", "reasoning", "tools"] |
||||
# ] = "standard", |
||||
# max_length_answer: int = 4096, |
||||
# messages: list[dict] = None, |
||||
# chat: bool = True, |
||||
# chosen_backend: str = None, |
||||
# tools: list = None, |
||||
# ) -> None: |
||||
# """ |
||||
# Initialize the assistant with the given parameters. |
||||
|
||||
# Args: |
||||
# system_message (str): The initial system message for the assistant. Defaults to "You are an assistant.". |
||||
# temperature (float): The temperature setting for the model, affecting randomness. Defaults to 0.01. |
||||
# model (Optional[Literal["small", "standard", "vision", "reasoning"]]): The model type to use. Defaults to "standard". |
||||
# max_length_answer (int): The maximum length of the generated answer. Defaults to 4096. |
||||
# messages (list[dict], optional): A list of initial messages. Defaults to None. |
||||
# chat (bool): Whether the assistant is in chat mode. Defaults to True. |
||||
# chosen_backend (str, optional): The backend server to use. If not provided, the least connected server is chosen. |
||||
|
||||
# Returns: |
||||
# None |
||||
# """ |
||||
|
||||
# self.model = self.get_model(model) |
||||
# self.call_model = ( |
||||
# self.model |
||||
# ) # This is set per call to decide what model that was actually used |
||||
# self.system_message = system_message |
||||
# self.options = {"temperature": temperature} |
||||
# self.messages = messages or [{"role": "system", "content": self.system_message}] |
||||
# self.max_length_answer = max_length_answer |
||||
# self.chat = chat |
||||
|
||||
# if not chosen_backend: |
||||
# chosen_backend = self.get_least_conn_server() |
||||
# self.chosen_backend = chosen_backend |
||||
|
||||
|
||||
# headers = { |
||||
# "Authorization": f"Basic {self.get_credentials()}", |
||||
# "X-Chosen-Backend": self.chosen_backend, |
||||
# } |
||||
# self.host_url = os.getenv("LLM_API_URL").rstrip("/api/chat/") |
||||
# self.host_url = 'http://192.168.1.12:3300' #! Change back when possible |
||||
# self.client: Client = Client(host=self.host_url, headers=headers, timeout=240) |
||||
# self.async_client: AsyncClient = AsyncClient() |
||||
|
||||
# def get_credentials(self): |
||||
# # Initialize the client with the host and default headers |
||||
# credentials = f"{os.getenv('LLM_API_USER')}:{os.getenv('LLM_API_PWD_LASSE')}" |
||||
# return base64.b64encode(credentials.encode()).decode() |
||||
|
||||
# def get_model(self, model_alias): |
||||
|
||||
# models = { |
||||
# "standard": "LLM_MODEL", |
||||
# "small": "LLM_MODEL_SMALL", |
||||
# "vision": "LLM_MODEL_VISION", |
||||
# "standard_64k": "LLM_MODEL_LARGE", |
||||
# "reasoning": "LLM_MODEL_REASONING", |
||||
# "tools": "LLM_MODEL_TOOLS", |
||||
# } |
||||
# model = os.getenv(models.get(model_alias, "LLM_MODEL")) |
||||
# self.model = model |
||||
# return model |
||||
|
||||
# def count_tokens(self): |
||||
# num_tokens = 0 |
||||
# for i in self.messages: |
||||
# for k, v in i.items(): |
||||
# if k == "content": |
||||
# if not isinstance(v, str): |
||||
# v = str(v) |
||||
# tokens = tokenizer.encode(v) |
||||
# num_tokens += len(tokens) |
||||
# return int(num_tokens) |
||||
|
||||
# def get_least_conn_server(self): |
||||
# try: |
||||
# response = requests.get("http://192.168.1.12:5000/least_conn") |
||||
# response.raise_for_status() |
||||
# # Extract the least connected server from the response |
||||
# least_conn_server = response.headers.get("X-Upstream-Address") |
||||
# return least_conn_server |
||||
# except requests.RequestException as e: |
||||
# print_red("Error getting least connected server:", e) |
||||
# return None |
||||
|
||||
# def generate( |
||||
# self, |
||||
# query: str = None, |
||||
# user_input: str = None, |
||||
# context: str = None, |
||||
# stream: bool = False, |
||||
# tools: list = None, |
||||
# images: list = None, |
||||
# model: Optional[ |
||||
# Literal["small", "standard", "vision", "reasoning", "tools"] |
||||
# ] = None, |
||||
# temperature: float = None, |
||||
# messages: list[dict] = None, |
||||
# format: BaseModel = None, |
||||
# think: bool = False |
||||
# ): |
||||
# """ |
||||
# Generate a response based on the provided query and context. |
||||
# Parameters: |
||||
# query (str): The query string from the user. |
||||
# user_input (str): Additional user input to be appended to the last message. |
||||
# context (str): Contextual information to be used in generating the response. |
||||
# stream (bool): Whether to stream the response. |
||||
# tools (list): List of tools to be used in generating the response. |
||||
# images (list): List of images to be included in the response. |
||||
# model (Optional[Literal["small", "standard", "vision", "tools"]]): The model type to be used. |
||||
# temperature (float): The temperature setting for the model. |
||||
# messages (list[dict]): List of previous messages in the conversation. |
||||
# format (Optional[BaseModel]): The format of the response. |
||||
# think (bool): Whether to use the reasoning model. |
||||
|
||||
# Returns: |
||||
# str: The generated response or an error message if an exception occurs. |
||||
# """ |
||||
|
||||
# # Prepare the model and temperature |
||||
|
||||
# model = self.get_model(model) if model else self.model |
||||
# # if model == self.get_model('tools'): |
||||
# # stream = False |
||||
# temperature = temperature if temperature else self.options["temperature"] |
||||
|
||||
# if messages: |
||||
# messages = [ |
||||
# {"role": i["role"], "content": re.sub(r"\s*\n\s*", "\n", i["content"])} |
||||
# for i in messages |
||||
# ] |
||||
# message = messages.pop(-1) |
||||
# query = message["content"] |
||||
# self.messages = messages |
||||
# else: |
||||
# # Normalize whitespace and add the query to the messages |
||||
# query = re.sub(r"\s*\n\s*", "\n", query) |
||||
# message = {"role": "user", "content": query} |
||||
|
||||
# # Handle images if any |
||||
# if images: |
||||
# message = self.prepare_images(images, message) |
||||
# model = self.get_model("vision") |
||||
|
||||
# self.messages.append(message) |
||||
|
||||
# # Prepare headers |
||||
# headers = {"Authorization": f"Basic {self.get_credentials()}"} |
||||
# if self.chosen_backend and model not in [self.get_model("vision"), self.get_model("tools"), self.get_model("reasoning")]: #TODO Maybe reasoning shouldn't be here. |
||||
# headers["X-Chosen-Backend"] = self.chosen_backend |
||||
|
||||
# if model == self.get_model("small"): |
||||
# headers["X-Model-Type"] = "small" |
||||
# if model == self.get_model("tools"): |
||||
# headers["X-Model-Type"] = "tools" |
||||
|
||||
# reasoning_models = ['qwen3', 'deepseek'] #TODO Add more reasoning models here when added to ollama |
||||
# if any([model_name in model for model_name in reasoning_models]): |
||||
# if think: |
||||
# self.messages[-1]['content'] = f"/think\n{self.messages[-1]['content']}" |
||||
# else: |
||||
# self.messages[-1]['content'] = f"/no_think\n{self.messages[-1]['content']}" |
||||
|
||||
# # Prepare options |
||||
# options = Options(**self.options) |
||||
# options.temperature = temperature |
||||
|
||||
# # Call the client.chat method |
||||
# try: |
||||
# self.call_model = model |
||||
# self.client: Client = Client(host=self.host_url, headers=headers, timeout=300) #! |
||||
# #print_rainbow(self.client._client.__dict__) |
||||
# print_yellow(f"🤖 Generating using {model}...") |
||||
# # if headers: |
||||
# # self.client.headers.update(headers) |
||||
# response = self.client.chat( |
||||
# model=model, |
||||
# messages=self.messages, |
||||
# tools=tools, |
||||
# stream=stream, |
||||
# options=options, |
||||
# keep_alive=3600 * 24 * 7, |
||||
# format=format |
||||
# ) |
||||
|
||||
# except ResponseError as e: |
||||
# print_red("Error!") |
||||
# print(e) |
||||
# return "An error occurred." |
||||
# # print_rainbow(response.__dict__) |
||||
# # If user_input is provided, update the last message |
||||
|
||||
# if user_input: |
||||
# if context: |
||||
# if len(context) > 2000: |
||||
# context = self.make_summary(context) |
||||
# user_input = ( |
||||
# f"{user_input}\n\nUse the information below to answer the question.\n" |
||||
# f'"""{context}"""\n[This is a summary of the context provided in the original message.]' |
||||
# ) |
||||
# system_message_info = "\nSometimes some of the messages in the chat history are summarised, then that is clearly indicated in the message." |
||||
# if system_message_info not in self.messages[0]["content"]: |
||||
# self.messages[0]["content"] += system_message_info |
||||
# self.messages[-1] = {"role": "user", "content": user_input} |
||||
|
||||
# # self.chosen_backend = self.client.last_response.headers.get("X-Chosen-Backend") |
||||
|
||||
# # Handle streaming response |
||||
# if stream: |
||||
# print_purple("STREAMING") |
||||
# return self.read_stream(response) |
||||
# else: |
||||
# # Process the response |
||||
# if isinstance(response, ChatResponse): |
||||
# result = response.message.content.strip('"') |
||||
# if '</think>' in result: |
||||
# result = result.split('</think>')[-1] |
||||
# self.messages.append( |
||||
# {"role": "assistant", "content": result.strip('"')} |
||||
# ) |
||||
# if tools and not response.message.get("tool_calls"): |
||||
# print_yellow("No tool calls in response".upper()) |
||||
# if not self.chat: |
||||
# self.messages = [self.messages[0]] |
||||
|
||||
# if not think: |
||||
# response.message.content = remove_thinking(response.message.content) |
||||
# return response.message |
||||
# else: |
||||
# print_red("Unexpected response type") |
||||
# return "An error occurred." |
||||
|
||||
# def make_summary(self, text): |
||||
# # Implement your summary logic using self.client.chat() |
||||
# summary_message = { |
||||
# "role": "user", |
||||
# "content": f'Summarize the text below:\n"""{text}"""\nRemember to be concise and detailed. Answer in English.', |
||||
# } |
||||
# messages = [ |
||||
# { |
||||
# "role": "system", |
||||
# "content": "You are summarizing a text. Make it detailed and concise. Answer ONLY with the summary. Don't add any new information.", |
||||
# }, |
||||
# summary_message, |
||||
# ] |
||||
# try: |
||||
# response = self.client.chat( |
||||
# model=self.get_model("small"), |
||||
# messages=messages, |
||||
# options=Options(temperature=0.01), |
||||
# keep_alive=3600 * 24 * 7, |
||||
# ) |
||||
# summary = response.message.content.strip() |
||||
# print_blue("Summary:", summary) |
||||
# return summary |
||||
# except ResponseError as e: |
||||
# print_red("Error generating summary:", e) |
||||
# return "Summary generation failed." |
||||
|
||||
# def read_stream(self, response): |
||||
# """ |
||||
# Yields tuples of (chunk_type, text). The first tuple is ('thinking', ...) |
||||
# if in_thinking is True and stops at </think>. After that, yields ('normal', ...) |
||||
# for the rest of the text. |
||||
# """ |
||||
# thinking_buffer = "" |
||||
# in_thinking = self.call_model == self.get_model("reasoning") |
||||
# first_chunk = True |
||||
# prev_content = None |
||||
|
||||
# for chunk in response: |
||||
# if not chunk: |
||||
# continue |
||||
# content = chunk.message.content |
||||
|
||||
# # Remove leading quote if it's the first chunk |
||||
# if first_chunk and content.startswith('"'): |
||||
# content = content[1:] |
||||
# first_chunk = False |
||||
|
||||
# if in_thinking: |
||||
# thinking_buffer += content |
||||
# if "</think>" in thinking_buffer: |
||||
# end_idx = thinking_buffer.index("</think>") + len("</think>") |
||||
# yield ("thinking", thinking_buffer[:end_idx]) |
||||
# remaining = thinking_buffer[end_idx:].strip('"') |
||||
# if chunk.done and remaining: |
||||
# yield ("normal", remaining) |
||||
# break |
||||
# else: |
||||
# prev_content = remaining |
||||
# in_thinking = False |
||||
# else: |
||||
# if prev_content: |
||||
# yield ("normal", prev_content) |
||||
# prev_content = content |
||||
|
||||
# if chunk.done: |
||||
# if prev_content and prev_content.endswith('"'): |
||||
# prev_content = prev_content[:-1] |
||||
# if prev_content: |
||||
# yield ("normal", prev_content) |
||||
# break |
||||
|
||||
# self.messages.append({"role": "assistant", "content": ""}) |
||||
|
||||
# async def async_generate( |
||||
# self, |
||||
# query: str = None, |
||||
# user_input: str = None, |
||||
# context: str = None, |
||||
# stream: bool = False, |
||||
# tools: list = None, |
||||
# images: list = None, |
||||
# model: Optional[Literal["small", "standard", "vision"]] = None, |
||||
# temperature: float = None, |
||||
# ): |
||||
# """ |
||||
# Asynchronously generates a response based on the provided query and other parameters. |
||||
|
||||
# Args: |
||||
# query (str, optional): The query string to generate a response for. |
||||
# user_input (str, optional): Additional user input to be included in the response. |
||||
# context (str, optional): Context information to be used in generating the response. |
||||
# stream (bool, optional): Whether to stream the response. Defaults to False. |
||||
# tools (list, optional): List of tools to be used in generating the response. Will set the model to 'tools'. |
||||
# images (list, optional): List of images to be included in the response. |
||||
# model (Optional[Literal["small", "standard", "vision", "tools"]], optional): The model to be used for generating the response. |
||||
# temperature (float, optional): The temperature setting for the model. |
||||
|
||||
# Returns: |
||||
# str: The generated response or an error message if an exception occurs. |
||||
|
||||
# Raises: |
||||
# ResponseError: If an error occurs during the response generation. |
||||
|
||||
# Notes: |
||||
# - The function prepares the model and temperature settings. |
||||
# - It normalizes whitespace in the query and handles images if provided. |
||||
# - It prepares headers and options for the request. |
||||
# - It adjusts options for long messages and calls the async client's chat method. |
||||
# - If user_input is provided, it updates the last message. |
||||
# - It updates the chosen backend based on the response headers. |
||||
# - It handles streaming responses and processes the response accordingly. |
||||
# - It's not neccecary to set model to 'tools' if you provide tools as an argument. |
||||
# """ |
||||
# print_yellow("ASYNC GENERATE") |
||||
# # Normaliz e whitespace and add the query to the messages |
||||
# query = re.sub(r"\s*\n\s*", "\n", query) |
||||
# message = {"role": "user", "content": query} |
||||
# self.messages.append(message) |
||||
|
||||
# # Prepare the model and temperature |
||||
# model = self.get_model(model) if model else self.model |
||||
# temperature = temperature if temperature else self.options["temperature"] |
||||
|
||||
# # Prepare options |
||||
# options = Options(**self.options) |
||||
# options.temperature = temperature |
||||
|
||||
# # Prepare headers |
||||
# headers = {} |
||||
|
||||
# # Set model depending on the input |
||||
# if images: |
||||
# message = self.prepare_images(images, message) |
||||
# model = self.get_model("vision") |
||||
# elif tools: |
||||
# model = self.get_model("tools") |
||||
# headers["X-Model-Type"] = "tools" |
||||
# tools = [Tool(**tool) if isinstance(tool, dict) else tool for tool in tools] |
||||
# elif self.chosen_backend and model not in [self.get_model("vision"), self.get_model("tools"), self.get_model("reasoning")]: |
||||
# headers["X-Chosen-Backend"] = self.chosen_backend |
||||
# elif model == self.get_model("small"): |
||||
# headers["X-Model-Type"] = "small" |
||||
|
||||
# # Adjust options for long messages |
||||
# if self.chat or len(self.messages) > 15000: |
||||
# num_tokens = self.count_tokens() + self.max_length_answer // 2 |
||||
# if num_tokens > 8000 and model not in [ |
||||
# self.get_model("vision"), |
||||
# self.get_model("tools"), |
||||
# ]: |
||||
# model = self.get_model("standard_64k") |
||||
# headers["X-Model-Type"] = "large" |
||||
|
||||
# # Call the async client's chat method |
||||
# try: |
||||
# response = await self.async_client.chat( |
||||
# model=model, |
||||
# messages=self.messages, |
||||
# headers=headers, |
||||
# tools=tools, |
||||
# stream=stream, |
||||
# options=options, |
||||
# keep_alive=3600 * 24 * 7, |
||||
# ) |
||||
# except ResponseError as e: |
||||
# print_red("Error!") |
||||
# print(e) |
||||
# return "An error occurred." |
||||
|
||||
# # If user_input is provided, update the last message |
||||
# if user_input: |
||||
# if context: |
||||
# if len(context) > 2000: |
||||
# context = self.make_summary(context) |
||||
# user_input = ( |
||||
# f"{user_input}\n\nUse the information below to answer the question.\n" |
||||
# f'"""{context}"""\n[This is a summary of the context provided in the original message.]' |
||||
# ) |
||||
# system_message_info = "\nSometimes some of the messages in the chat history are summarised, then that is clearly indicated in the message." |
||||
# if system_message_info not in self.messages[0]["content"]: |
||||
# self.messages[0]["content"] += system_message_info |
||||
# self.messages[-1] = {"role": "user", "content": user_input} |
||||
|
||||
# print_red(self.async_client.last_response.headers.get("X-Chosen-Backend", "No backend")) |
||||
# # Update chosen_backend |
||||
# if model not in [self.get_model("vision"), self.get_model("tools"), self.get_model("reasoning")]: |
||||
# self.chosen_backend = self.async_client.last_response.headers.get( |
||||
# "X-Chosen-Backend" |
||||
# ) |
||||
|
||||
# # Handle streaming response |
||||
# if stream: |
||||
# return self.read_stream(response) |
||||
# else: |
||||
# # Process the response |
||||
# if isinstance(response, ChatResponse): |
||||
# result = response.message.content.strip('"') |
||||
# self.messages.append( |
||||
# {"role": "assistant", "content": result.strip('"')} |
||||
# ) |
||||
# if tools and not response.message.get("tool_calls"): |
||||
# print_yellow("No tool calls in response".upper()) |
||||
# if not self.chat: |
||||
# self.messages = [self.messages[0]] |
||||
# return result |
||||
# else: |
||||
# print_red("Unexpected response type") |
||||
# return "An error occurred." |
||||
|
||||
# def prepare_images(self, images, message): |
||||
# """ |
||||
# Prepares a list of images by converting them to base64 encoded strings and adds them to the provided message dictionary. |
||||
# Args: |
||||
# images (list): A list of images, where each image can be a file path (str), a base64 encoded string (str), or bytes. |
||||
# message (dict): A dictionary to which the base64 encoded images will be added under the key "images". |
||||
# Returns: |
||||
# dict: The updated message dictionary with the base64 encoded images added under the key "images". |
||||
# Raises: |
||||
# ValueError: If an image is not a string or bytes. |
||||
# """ |
||||
# import base64 |
||||
|
||||
# base64_images = [] |
||||
# base64_pattern = re.compile(r"^[A-Za-z0-9+/]+={0,2}$") |
||||
|
||||
# for image in images: |
||||
# if isinstance(image, str): |
||||
# if base64_pattern.match(image): |
||||
# base64_images.append(image) |
||||
# else: |
||||
# with open(image, "rb") as image_file: |
||||
# base64_images.append( |
||||
# base64.b64encode(image_file.read()).decode("utf-8") |
||||
# ) |
||||
# elif isinstance(image, bytes): |
||||
# base64_images.append(base64.b64encode(image).decode("utf-8")) |
||||
# else: |
||||
# print_red("Invalid image type") |
||||
|
||||
# message["images"] = base64_images |
||||
# # Use the vision model |
||||
|
||||
# return message |
||||
|
||||
# def remove_thinking(response): |
||||
# """Remove the thinking section from the response""" |
||||
# response_text = response.content if hasattr(response, "content") else str(response) |
||||
# if "</think>" in response_text: |
||||
# return response_text.split("</think>")[1].strip() |
||||
# return response_text |
||||
|
||||
# if __name__ == "__main__": |
||||
|
||||
# llm = LLM() |
||||
|
||||
# result = llm.generate( |
||||
# query="I want to add 2 and 2", |
||||
# ) |
||||
# print(result.content) |
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,334 @@ |
||||
from pydantic import BaseModel, Field |
||||
from typing import Dict, List, Tuple, Optional, Any |
||||
|
||||
class ArticleChunk(BaseModel): |
||||
summary: str |
||||
tags: List[str] |
||||
references: Optional[List[str]] |
||||
|
||||
|
||||
class QueryResponse(BaseModel): |
||||
""" |
||||
Represents a query generated for retrieving documents from a vector database. |
||||
|
||||
Attributes: |
||||
query (str): The generated query text, short and concise. |
||||
""" |
||||
|
||||
query: str = Field( |
||||
description="The generated query that will be used to retrieve documents from a vector database (ChromaDB). Should be short and concise.", |
||||
example="capital of France", |
||||
) |
||||
|
||||
class ArticleMetadataResponse(BaseModel): |
||||
""" |
||||
Represents structured metadata extracted from an article by an LLM. |
||||
""" |
||||
published_date: Optional[str] = Field( |
||||
description="The publication date of the article in YYYY-MM-DD format." |
||||
) |
||||
title: str = Field( |
||||
description="The full title of the article." |
||||
) |
||||
journal: Optional[str] = Field( |
||||
description="The name of the journal/paper/outlet where the article was published." |
||||
) |
||||
|
||||
|
||||
class PlanEvaluationResponse(BaseModel): |
||||
""" |
||||
Represents the evaluation of a plan's step. |
||||
|
||||
Attributes: |
||||
reasoning (str): Explanation of the reasoning behind the evaluation. |
||||
complete (bool): Indicates if the step has sufficient information to proceed. |
||||
""" |
||||
|
||||
reasoning: str = Field( |
||||
description="A short explanation of the reasoning behind the evaluation", |
||||
example="Although some information is missing, the existing data is sufficient to complete the step.", |
||||
) |
||||
complete: bool = Field( |
||||
description="Indicates whether the information is sufficient to complete the step", |
||||
example=False, |
||||
) |
||||
|
||||
|
||||
class EvaluateFormat(BaseModel): |
||||
""" |
||||
Represents the evaluation format for determining sufficiency of information. |
||||
|
||||
Attributes: |
||||
explanation (str): Explanation of whether the information is sufficient. |
||||
status (bool): Indicates sufficiency of the information. |
||||
additional_info (Optional[str]): Additional information needed if insufficient. |
||||
""" |
||||
|
||||
explanation: str = Field( |
||||
description="A very short explanation of whether the information is sufficient or not", |
||||
example="The information is sufficient because...", |
||||
) |
||||
status: bool = Field( |
||||
description="If the information is sufficient to complete the step or not.", |
||||
example=True, |
||||
) |
||||
additional_info: Optional[str] = Field( |
||||
description="If the information is not sufficient, what additional information would be needed", |
||||
example="We need more information about...", |
||||
) |
||||
|
||||
|
||||
class Plan(BaseModel): |
||||
""" |
||||
Represents a structured plan with steps and corresponding tasks or facts. |
||||
|
||||
Attributes: |
||||
steps (Dict[str, List[Tuple[str, str]]]): A dictionary where keys are step names and values are lists of tasks or facts. |
||||
""" |
||||
|
||||
steps: Dict[str, List[Tuple[str, str]]] = Field( |
||||
description="Structured plan represented as steps with their corresponding tasks or facts", |
||||
example={ |
||||
"Step 1: Gather Existing Materials": [ |
||||
("Task 1", "Description of task"), |
||||
("Task 2", "Description of task"), |
||||
], |
||||
"Step 2: Extract Relevant Information": [ |
||||
("Task 1", "Description of task"), |
||||
("Task 2", "Description of task"), |
||||
], |
||||
}, |
||||
) |
||||
|
||||
|
||||
class ChunkMetadata(BaseModel): |
||||
""" |
||||
Metadata associated with a document chunk. |
||||
|
||||
Attributes: |
||||
title (str): Title of the document chunk. |
||||
journal (Optional[str]): Journal where the document was published. |
||||
published_date (Optional[str]): Date of publication. |
||||
user_notes (Optional[str]): User-provided notes. |
||||
arango_id (Optional[str]): Unique identifier for the document in ArangoDB. |
||||
additional_metadata (Dict[str, Any]): Any additional metadata fields. |
||||
doi (Optional[str]): Digital Object Identifier for the document. |
||||
link: (Optional[str]): URL to access the document. |
||||
authors (Optional[List[str]]): List of authors of the document. |
||||
published_year (Optional[int]): Year of publication. |
||||
abstract: (Optional[str]): Abstract of the document. |
||||
pages: (Optional[str]): Page numbers of the document. |
||||
chroma_id (Optional[str]): Unique identifier for the chunk in ChromaDB. |
||||
""" |
||||
|
||||
title: str = Field(default="No title", description="Title of the document chunk.") |
||||
journal: Optional[str] = None |
||||
published_date: Optional[str] = None |
||||
user_notes: Optional[str] = None |
||||
_id: Optional[str] = None |
||||
additional_metadata: Dict[str, Any] = Field(default_factory=dict) |
||||
doi: Optional[str] = None |
||||
link: Optional[str] = None |
||||
authors: Optional[List[str]] = Field( |
||||
default_factory=list, |
||||
description="List of authors of the document.", |
||||
) |
||||
published_year: Optional[int] = Field( |
||||
default=None, |
||||
description="Year of publication.", |
||||
) |
||||
abstract: Optional[str] = Field( |
||||
default=None, |
||||
description="Abstract of the document.", |
||||
) |
||||
pages: Optional[str] = Field( |
||||
default=None, |
||||
description="Page numbers of the document.", |
||||
) |
||||
chroma_id: Optional[str] = Field( |
||||
default=None, |
||||
description="Unique identifier for the chunk in ChromaDB.", |
||||
) |
||||
|
||||
|
||||
class DocumentChunk(BaseModel): |
||||
""" |
||||
Represents a chunk of text from a document with its metadata. |
||||
|
||||
Attributes: |
||||
document (str): The text content of the chunk. |
||||
metadata (ChunkMetadata): Metadata associated with the chunk. |
||||
""" |
||||
|
||||
document: str |
||||
metadata: ChunkMetadata |
||||
|
||||
|
||||
|
||||
|
||||
class UnifiedDataChunk(BaseModel): |
||||
""" |
||||
Represents a unified chunk of data from any source. |
||||
|
||||
Attributes: |
||||
content (str): The main content of the chunk (e.g., text, note, or document). |
||||
metadata (Optional[Dict[str, Any]]): Metadata associated with the chunk. |
||||
source_type (str): The type of source (e.g., 'note', 'article', 'document'). |
||||
""" |
||||
|
||||
content: str = Field( |
||||
description="The main content of the chunk (e.g., text, note, or document)." |
||||
) |
||||
metadata: Optional[ChunkMetadata] = Field( |
||||
description="Metadata associated with the chunk (e.g., title, source, date).", |
||||
) |
||||
source_type: str = Field( |
||||
description="The type of source (e.g., 'note', 'article', 'document')." |
||||
) |
||||
|
||||
|
||||
class UnifiedSearchResults(BaseModel): |
||||
""" |
||||
Represents unified search results from any search tool. |
||||
|
||||
Attributes: |
||||
chunks (List[UnifiedDataChunk]): List of data chunks from the search. |
||||
source_ids (List[str]): List of unique source IDs for the chunks. |
||||
""" |
||||
|
||||
chunks: List[UnifiedDataChunk] = Field( |
||||
description="List of data chunks from the search." |
||||
) |
||||
source_ids: List[str] = Field( |
||||
default_factory=list, description="List of unique source IDs for the chunks." |
||||
) |
||||
|
||||
|
||||
class UnifiedToolResponse(BaseModel): |
||||
""" |
||||
Represents a unified response from any tool. |
||||
|
||||
Attributes: |
||||
search_results (Optional[UnifiedSearchResults]): The unified search results, if the tool used is returning search results. |
||||
text_result (Optional[str]): Text result from the tool, e.g., if the tool is an analysis. |
||||
tool_name (str): The name of the tool used to generate the response. |
||||
""" |
||||
|
||||
search_results: Optional[UnifiedSearchResults] = Field( |
||||
default=None, |
||||
description="The unified search results, if the tools used is returning search results.", |
||||
) |
||||
text_results: Optional[list[str]] = Field( |
||||
default=None, |
||||
description="Text results from the tool, e.g., if the tool is an analysis.", |
||||
) |
||||
tool_names: Optional[list[str]] = Field( |
||||
default=None, description="The name of the tool used to generate the response." |
||||
) |
||||
|
||||
def extend_search_results(self, search_results: UnifiedSearchResults) -> None: |
||||
""" |
||||
Extends the search results with additional data. |
||||
|
||||
Args: |
||||
search_results (UnifiedSearchResults): The new search results to extend. |
||||
""" |
||||
if self.search_results is None: |
||||
self.search_results = search_results |
||||
else: |
||||
self.search_results.chunks.extend(search_results.chunks) |
||||
self.search_results.source_ids.extend(search_results.source_ids) |
||||
|
||||
def extend_text_results(self, text_result: str) -> None: |
||||
""" |
||||
Extends the text result with additional data. |
||||
|
||||
Args: |
||||
text_result (str): The new text result to extend. |
||||
""" |
||||
if self.text_results is None: |
||||
self.text_results = [text_result] |
||||
else: |
||||
self.text_results.append(text_result) |
||||
|
||||
def extend_tool_name(self, tool_name: str) -> None: |
||||
""" |
||||
Extends the tool name with additional data. |
||||
|
||||
Args: |
||||
tool_name (str): The new tool name to extend. |
||||
""" |
||||
if self.tool_names is None: |
||||
self.tool_names = [tool_name] |
||||
else: |
||||
self.tool_names.append(tool_name) |
||||
|
||||
@property |
||||
def to_text(self) -> str: |
||||
""" |
||||
Generates formatted text from search results or returns the text result. |
||||
|
||||
If search_results exists, formats content from each chunk along with its source. |
||||
Otherwise, returns the text_result if available. |
||||
|
||||
Returns: |
||||
str: The formatted text from search results or the text result. |
||||
Raises: |
||||
ValueError: If neither search_results nor text_results are available. |
||||
""" |
||||
if self.search_results and self.search_results.chunks: |
||||
formatted_chunks = [] |
||||
for i, chunk in enumerate(self.search_results.chunks): |
||||
# Handle UnifiedDataChunk structure |
||||
content = chunk.content |
||||
metadata = chunk.metadata or {} |
||||
|
||||
source_info = f"Source: {metadata.title}" |
||||
if metadata.journal: |
||||
source_info += f" - {metadata.journal}" |
||||
if metadata.published_date: |
||||
source_info += f" ({metadata.published_date})" |
||||
|
||||
# Format the chunk with its content and source |
||||
formatted_chunk = f"### Chunk {i+1}\n{content}\n\n*{source_info}*\n" |
||||
formatted_chunks.append(formatted_chunk) |
||||
|
||||
return "\n---\n".join(formatted_chunks) |
||||
elif self.text_results: |
||||
return '\n---\n'.join(self.text_results) |
||||
else: |
||||
return "No search results or text results available." |
||||
|
||||
|
||||
@property |
||||
def get_chroma_ids(self) -> List[str]: |
||||
""" |
||||
Returns the list of Chroma IDs from the search results. |
||||
|
||||
Returns: |
||||
List[str]: The list of Chroma IDs. |
||||
""" |
||||
if self.search_results and self.search_results.source_ids: |
||||
return self.search_results.source_ids |
||||
return [] |
||||
|
||||
class ChunkSearchResults(BaseModel): |
||||
""" |
||||
Represents the results of a search query across document collections. |
||||
|
||||
Attributes: |
||||
chunks (List[DocumentChunk]): List of document chunks containing text and metadata. |
||||
chroma_ids (List[str]): List of Chroma IDs for the chunks. |
||||
arango_ids (List[str]): List of ArangoDB IDs for the related documents. |
||||
""" |
||||
|
||||
chunks: List[UnifiedDataChunk] = Field( |
||||
description="List of document chunks containing text, metadata, and relevance scores." |
||||
) |
||||
chroma_ids: List[str] = Field( |
||||
default_factory=list, description="List of Chroma IDs for the chunks" |
||||
) |
||||
arango_ids: List[str] = Field( |
||||
default_factory=list, |
||||
description="List of ArangoDB IDs for the related documents", |
||||
) |
||||
@ -1,6 +0,0 @@ |
||||
from pydantic import BaseModel |
||||
|
||||
class QueryResponse(BaseModel): |
||||
query_to_vector_database: str |
||||
short_explanation: str |
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,31 +0,0 @@ |
||||
from TTS.api import TTS |
||||
import torch |
||||
from datetime import datetime |
||||
tts = TTS("tts_models/en/multi-dataset/tortoise-v2") |
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
||||
tts.to(device) |
||||
text="There is, therefore, an increasing need to understand BEVs from a systems perspective. This involves an in-depth consideration of the environmental impact of the product using life cycle assessment (LCA) as well as taking a broader 'circular economy' approach. On the one hand, LCA is a means of assessing the environmental impact associated with all stages of a product's life from cradle to grave: from raw material extraction and processing to the product's manufacture to its use in everyday life and finally to its end of life." |
||||
|
||||
|
||||
# cloning `lj` voice from `TTS/tts/utils/assets/tortoise/voices/lj` |
||||
# with custom inference settings overriding defaults. |
||||
time_now = datetime.now().strftime("%Y%m%d%H%M%S") |
||||
output_path = f"output/tortoise_{time_now}.wav" |
||||
tts.tts_to_file(text, |
||||
file_path=output_path, |
||||
voice_dir="voices", |
||||
speaker="test", |
||||
split_sentences=False, # Change to True if context is not enough |
||||
num_autoregressive_samples=20, |
||||
diffusion_iterations=50) |
||||
|
||||
# # Using presets with the same voice |
||||
# tts.tts_to_file(text, |
||||
# file_path="output.wav", |
||||
# voice_dir="path/to/tortoise/voices/dir/", |
||||
# speaker="lj", |
||||
# preset="ultra_fast") |
||||
|
||||
# # Random voice generation |
||||
# tts.tts_to_file(text, |
||||
# file_path="output.wav") |
||||
@ -0,0 +1,209 @@ |
||||
#!/usr/bin/env python3 |
||||
""" |
||||
Test LLM Server and View Results |
||||
|
||||
This script sends a test document to the LLM server for summarization, |
||||
waits for processing to complete, and displays the results. |
||||
|
||||
Usage: |
||||
python test_and_view.py [--wait SECONDS] [--retries COUNT] |
||||
|
||||
Options: |
||||
--wait SECONDS Number of seconds to wait between polling attempts (default: 5) |
||||
--retries COUNT Maximum number of polling attempts (default: 20) |
||||
""" |
||||
|
||||
import requests |
||||
import json |
||||
import time |
||||
import os |
||||
import argparse |
||||
import sys |
||||
from _arango import ArangoDB |
||||
|
||||
|
||||
def send_test_document(): |
||||
"""Send a test document to the LLM server for summarization.""" |
||||
print("Sending test document to LLM server...") |
||||
|
||||
# Define server endpoint |
||||
url = "http://localhost:8100/summarise_document" |
||||
|
||||
# Create a sample document with unique ID based on timestamp |
||||
doc_id = f"test_articles/climate_impact_{int(time.time())}" |
||||
|
||||
sample_document = { |
||||
"arango_doc": { |
||||
"text": """ |
||||
The Impact of Climate Change on Coral Reefs |
||||
|
||||
Climate change has significantly affected marine ecosystems worldwide, with coral reefs being among the most vulnerable. |
||||
Rising sea temperatures have led to increased coral bleaching events, where corals expel their symbiotic algae, |
||||
leading to whitening and potential death. Studies show that even a 1-2°C increase in water temperature |
||||
can trigger mass bleaching events. Additionally, ocean acidification caused by increased CO2 absorption |
||||
makes it difficult for corals to build their calcium carbonate skeletons. |
||||
|
||||
Recent research by Johnson et al. (2023) suggests that if current trends continue, we may lose up to 90% |
||||
of coral reefs by 2050. However, some corals have shown remarkable resilience. Certain species can adapt |
||||
to higher temperatures through a process called adaptive bleaching, where they exchange their algal symbionts |
||||
for more heat-tolerant varieties. Conservation efforts focused on cultivating these resilient species may |
||||
provide hope for reef preservation. |
||||
""", |
||||
"chunks": [] |
||||
}, |
||||
"arango_db_name": "test_db", |
||||
"arango_id": doc_id, |
||||
"is_sci": True |
||||
} |
||||
|
||||
try: |
||||
# Send request to server |
||||
response = requests.post(url, json=sample_document) |
||||
|
||||
if response.status_code == 200: |
||||
print("✓ Request accepted by server") |
||||
print(f"Document ID: {doc_id}") |
||||
return { |
||||
"db_name": "test_db", |
||||
"doc_id": doc_id |
||||
} |
||||
else: |
||||
print(f"✗ Error: {response.status_code}") |
||||
print(response.text) |
||||
return None |
||||
except Exception as e: |
||||
print(f"✗ Connection error: {e}") |
||||
return None |
||||
|
||||
|
||||
def poll_for_results(doc_info, max_retries=20, wait_time=5): |
||||
"""Poll the database until the document is summarized.""" |
||||
if not doc_info: |
||||
return None |
||||
|
||||
db_name = doc_info["db_name"] |
||||
doc_id = doc_info["doc_id"] |
||||
|
||||
print(f"\nPolling for results in {db_name}/{doc_id}...") |
||||
print(f"Will check every {wait_time} seconds, up to {max_retries} times.") |
||||
|
||||
arango = ArangoDB(db_name=db_name) |
||||
|
||||
for attempt in range(max_retries): |
||||
print(f"Attempt {attempt+1}/{max_retries}... ", end="", flush=True) |
||||
|
||||
try: |
||||
# Get the document from ArangoDB |
||||
document = arango.get_document(doc_id) |
||||
|
||||
# Check if the document has been summarized |
||||
if document and "summary" in document: |
||||
print("✓ Document summary found!") |
||||
return document |
||||
|
||||
print("Document exists but no summary yet") |
||||
time.sleep(wait_time) |
||||
|
||||
except Exception as e: |
||||
print(f"Error: {e}") |
||||
time.sleep(wait_time) |
||||
|
||||
print("\n✗ Summarization not completed after maximum retries.") |
||||
return None |
||||
|
||||
|
||||
def display_results(document): |
||||
"""Display the summarization results.""" |
||||
if not document: |
||||
print("\nNo results to display") |
||||
return |
||||
|
||||
print("\n" + "=" * 80) |
||||
print(f"RESULTS FOR DOCUMENT: {document.get('_id', 'Unknown')}") |
||||
print("=" * 80) |
||||
|
||||
# Document summary |
||||
print("\n📄 DOCUMENT SUMMARY") |
||||
print("-" * 80) |
||||
print(document["summary"]["text_sum"]) |
||||
|
||||
# Model info if available |
||||
if "meta" in document["summary"]: |
||||
meta = document["summary"]["meta"] |
||||
model = meta.get("model", "Unknown") |
||||
temp = meta.get("temperature", "Unknown") |
||||
print(f"\nGenerated using: {model} (temperature: {temp})") |
||||
|
||||
# Check for summarized chunks |
||||
if "chunks" in document and document["chunks"]: |
||||
summarized_chunks = [chunk for chunk in document["chunks"] if "summary" in chunk] |
||||
print(f"\n🧩 CHUNK SUMMARIES ({len(summarized_chunks)}/{len(document['chunks'])} chunks processed)") |
||||
|
||||
for i, chunk in enumerate(summarized_chunks): |
||||
print("\n" + "-" * 80) |
||||
print(f"Chunk {i+1}:") |
||||
print("-" * 80) |
||||
print(chunk["summary"]) |
||||
|
||||
# Display tags |
||||
if "tags" in chunk and chunk["tags"]: |
||||
print("\nTags:", ", ".join(chunk["tags"])) |
||||
|
||||
# Display references |
||||
if "references" in chunk and chunk["references"]: |
||||
print("\nReferences:") |
||||
for ref in chunk["references"]: |
||||
print(f"- {ref}") |
||||
|
||||
print("\n" + "=" * 80) |
||||
|
||||
# Provide links to web views |
||||
print("\nView in browser:") |
||||
print("- HTML view: http://localhost:8100/html_results") |
||||
print("- JSON view: http://localhost:8100/view_results") |
||||
|
||||
|
||||
def check_server_status(): |
||||
"""Check if the LLM server is running.""" |
||||
try: |
||||
response = requests.get("http://localhost:8100/latest_result", timeout=2) |
||||
return True |
||||
except: |
||||
return False |
||||
|
||||
|
||||
def main(): |
||||
parser = argparse.ArgumentParser(description='Test LLM server and view results') |
||||
parser.add_argument('--wait', type=int, default=5, help='Seconds to wait between polling attempts') |
||||
parser.add_argument('--retries', type=int, default=20, help='Maximum number of polling attempts') |
||||
args = parser.parse_args() |
||||
|
||||
print("LLM Server Test and View") |
||||
print("======================\n") |
||||
|
||||
# Check if server is running |
||||
if not check_server_status(): |
||||
print("ERROR: Cannot connect to LLM server at http://localhost:8100") |
||||
print("Make sure the server is running before continuing.") |
||||
sys.exit(1) |
||||
|
||||
print("✓ Server is running\n") |
||||
|
||||
# Send test document |
||||
doc_info = send_test_document() |
||||
if not doc_info: |
||||
print("Failed to send test document") |
||||
sys.exit(1) |
||||
|
||||
print("\n⏳ Processing document...") |
||||
print("(This may take some time depending on model size and document complexity)") |
||||
|
||||
# Poll for results |
||||
result = poll_for_results(doc_info, max_retries=args.retries, wait_time=args.wait) |
||||
|
||||
# Display results |
||||
display_results(result) |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
main() |
||||
@ -1,51 +0,0 @@ |
||||
from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub |
||||
from fairseq.models.text_to_speech.hub_interface import TTSHubInterface |
||||
from fairseq import utils |
||||
import nltk |
||||
import torch |
||||
|
||||
# Download the required NLTK resource |
||||
nltk.download('averaged_perceptron_tagger') |
||||
|
||||
# Model loading |
||||
models, cfg, task = load_model_ensemble_and_task_from_hf_hub( |
||||
"facebook/fastspeech2-en-ljspeech", |
||||
arg_overrides={"vocoder": "hifigan", "fp16": False} |
||||
) |
||||
|
||||
# Set device |
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
||||
|
||||
# Move all models to the correct device |
||||
for model in models: |
||||
model.to(device) |
||||
|
||||
# Update configuration and build generator after moving models |
||||
TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg) |
||||
generator = task.build_generator(models, cfg) |
||||
|
||||
# Ensure the vocoder is on the correct device |
||||
generator.vocoder.model.to(device) |
||||
|
||||
# Define your text |
||||
text = """Hi there, thanks for having me! My interest in electric cars really started back when I was a teenager...""" |
||||
|
||||
# Convert text to model input |
||||
sample = TTSHubInterface.get_model_input(task, text) |
||||
|
||||
# Recursively move all tensors in sample to the correct device |
||||
sample = utils.move_to_cuda(sample) if torch.cuda.is_available() else sample |
||||
|
||||
|
||||
|
||||
# Generate speech |
||||
wav, rate = TTSHubInterface.get_prediction(task, models[0], generator, sample) |
||||
|
||||
from scipy.io.wavfile import write |
||||
|
||||
# If wav is a tensor, convert it to a NumPy array |
||||
if isinstance(wav, torch.Tensor): |
||||
wav = wav.cpu().numpy() |
||||
|
||||
# Save the audio to a WAV file |
||||
write('output_fair.wav', rate, wav) |
||||
@ -1,91 +0,0 @@ |
||||
import asyncio |
||||
import re |
||||
from pdf_highlighter import Highlighter |
||||
from _chromadb import ChromaDB |
||||
from _llm import LLM |
||||
import ollama |
||||
from colorprinter.print_color import * |
||||
from concurrent.futures import ThreadPoolExecutor |
||||
|
||||
# Wrap the synchronous generate method |
||||
async def async_generate(llm, prompt): |
||||
loop = asyncio.get_event_loop() |
||||
with ThreadPoolExecutor() as pool: |
||||
return await loop.run_in_executor(pool, llm.generate, prompt) |
||||
|
||||
|
||||
# Define the main asynchronous function to highlight the PDFs |
||||
async def highlight_pdf(data): |
||||
# Use the highlight method to highlight the relevant sentences in the PDFs |
||||
highlighted_pdf_buffer = await highlighter.highlight( |
||||
data=data, zero_indexed_pages=True # Pages are zero-based (e.g., 0, 1, 2, ...) |
||||
) |
||||
|
||||
# Save the highlighted PDF to a new file |
||||
with open("highlighted_combined_documents.pdf", "wb") as f: |
||||
f.write(highlighted_pdf_buffer.getbuffer()) |
||||
print_green("PDF highlighting completed successfully!") |
||||
|
||||
|
||||
# Initialize ChromaDB client |
||||
chromadb = ChromaDB() |
||||
|
||||
# Define the query to fetch relevant text snippets and metadata from ChromaDB |
||||
query = "How are climate researchers advocating for change in the society?" |
||||
|
||||
|
||||
# Perform the query on ChromaDB |
||||
result = chromadb.query(query, collection="sci_articles", n_results=5) |
||||
# Use zip to combine the lists into a list of dictionaries |
||||
results = [ |
||||
{"id": id_, "metadata": metadata, "document": document, "distance": distance} |
||||
for id_, metadata, document, distance in zip( |
||||
result["ids"][0], |
||||
result["metadatas"][0], |
||||
result["documents"][0], |
||||
result["distances"][0], |
||||
) |
||||
] |
||||
|
||||
for r in results: |
||||
print_rainbow(r["metadata"]) |
||||
print_yellow(type(r["metadata"]['pages'])) |
||||
# Ask a LLM a question about the text snippets |
||||
llm = LLM(model="small") |
||||
documents_string = "\n\n---\n\n".join(result["documents"][0]) |
||||
answer = llm.generate( |
||||
f'''{query} Write your answer from the information below?\n\n"""{documents_string}"""\n\n{query}''' |
||||
) |
||||
print_green(answer) |
||||
# Now you want to highlight relevant information in the PDFs to understand what the LLM is using! |
||||
|
||||
# Each result from ChromaDB contains the PDF filename and the pages where the text is found |
||||
data = [] |
||||
for result in results: |
||||
pages = result["metadata"].get("pages") |
||||
try: |
||||
pages = [int(pages)] |
||||
except: |
||||
# Use re to extraxt the page numbers separated by commas |
||||
pages = list(map(int, re.findall(r"\d+", pages))) |
||||
|
||||
data.append( |
||||
{ |
||||
"user_input": query, |
||||
"pdf_filename": result["metadata"]["_id"], |
||||
"pages": pages, |
||||
'chunk': result['document'] |
||||
} |
||||
) |
||||
|
||||
# Initialize the Highlighter |
||||
highlighter = Highlighter( |
||||
llm=llm, # Pass the LLM to the Highlighter |
||||
comment=False, # Enable comments to understand the context |
||||
use_llm=False |
||||
) |
||||
|
||||
|
||||
|
||||
# Run the main function using asyncio |
||||
asyncio.run(highlight_pdf(data)) |
||||
@ -0,0 +1,191 @@ |
||||
import requests |
||||
import json |
||||
import time |
||||
from _arango import ArangoDB # Import ArangoDB client to fetch results |
||||
|
||||
def test_summarize_document(): |
||||
""" |
||||
Test the document summarization functionality of the LLM server by sending a POST request |
||||
to the summarize_document endpoint. |
||||
|
||||
This function creates a sample document, sends it to the LLM server, and then polls for results. |
||||
""" |
||||
print("Testing document summarization...") |
||||
|
||||
# Define server endpoint |
||||
url = "http://localhost:8100/summarise_document" |
||||
|
||||
# Create a sample document |
||||
sample_document = { |
||||
"arango_doc": { |
||||
"text": """ |
||||
The Impact of Climate Change on Coral Reefs |
||||
|
||||
Climate change has significantly affected marine ecosystems worldwide, with coral reefs being among the most vulnerable. |
||||
Rising sea temperatures have led to increased coral bleaching events, where corals expel their symbiotic algae, |
||||
leading to whitening and potential death. Studies show that even a 1-2°C increase in water temperature |
||||
can trigger mass bleaching events. Additionally, ocean acidification caused by increased CO2 absorption |
||||
makes it difficult for corals to build their calcium carbonate skeletons. |
||||
|
||||
Recent research by Johnson et al. (2023) suggests that if current trends continue, we may lose up to 90% |
||||
of coral reefs by 2050. However, some corals have shown remarkable resilience. Certain species can adapt |
||||
to higher temperatures through a process called adaptive bleaching, where they exchange their algal symbionts |
||||
for more heat-tolerant varieties. Conservation efforts focused on cultivating these resilient species may |
||||
provide hope for reef preservation. |
||||
""", |
||||
"chunks": [] |
||||
}, |
||||
"arango_db_name": "test_db", |
||||
"arango_id": "articles/test_article", |
||||
"is_sci": True |
||||
} |
||||
|
||||
# Send request to server |
||||
print("Sending document to server for summarization...") |
||||
response = requests.post(url, json=sample_document) |
||||
|
||||
if response.status_code == 200: |
||||
print("Request accepted. Response:", response.json()) |
||||
|
||||
# Save values for checking results later |
||||
return { |
||||
"db_name": sample_document["arango_db_name"], |
||||
"doc_id": sample_document["arango_id"] |
||||
} |
||||
else: |
||||
print(f"Error: {response.status_code}") |
||||
print(response.text) |
||||
return None |
||||
|
||||
def test_summarize_chunks(): |
||||
""" |
||||
Test the chunk summarization functionality directly by creating a sample document with chunks. |
||||
|
||||
In a real application, you'd typically query the results from the database after processing. |
||||
""" |
||||
print("\nTesting chunk summarization example...") |
||||
|
||||
# Sample document with chunks |
||||
sample_document_with_chunks = { |
||||
"arango_doc": { |
||||
"text": "", |
||||
"chunks": [ |
||||
{ |
||||
"text": "Climate change has significantly affected marine ecosystems worldwide, with coral reefs being among the most vulnerable. Rising sea temperatures have led to increased coral bleaching events.", |
||||
"pages": [1] |
||||
}, |
||||
{ |
||||
"text": "Studies by Smith et al. [1] show that even a 1-2°C increase in water temperature can trigger mass bleaching events. Additionally, ocean acidification makes it difficult for corals to build their calcium carbonate skeletons.", |
||||
"pages": [1, 2] |
||||
} |
||||
] |
||||
}, |
||||
"arango_db_name": "test_db", |
||||
"arango_id": "interviews/test_interview", |
||||
"is_sci": False |
||||
} |
||||
|
||||
url = "http://localhost:8100/summarise_document" |
||||
print("Sending document with chunks for summarization...") |
||||
response = requests.post(url, json=sample_document_with_chunks) |
||||
|
||||
if response.status_code == 200: |
||||
print("Request accepted. Response:", response.json()) |
||||
return { |
||||
"db_name": sample_document_with_chunks["arango_db_name"], |
||||
"doc_id": sample_document_with_chunks["arango_id"] |
||||
} |
||||
else: |
||||
print(f"Error: {response.status_code}") |
||||
print(response.text) |
||||
return None |
||||
|
||||
def poll_for_results(doc_info, max_retries=10, wait_time=5): |
||||
""" |
||||
Poll the ArangoDB database to check if the document has been summarized. |
||||
|
||||
Args: |
||||
doc_info (dict): Dictionary containing db_name and doc_id |
||||
max_retries (int): Maximum number of polling attempts |
||||
wait_time (int): Time to wait between polling attempts (seconds) |
||||
|
||||
Returns: |
||||
dict or None: The document with summaries if available, None otherwise |
||||
""" |
||||
if not doc_info: |
||||
return None |
||||
|
||||
db_name = doc_info["db_name"] |
||||
doc_id = doc_info["doc_id"] |
||||
|
||||
print(f"\nPolling for results in {db_name}/{doc_id}...") |
||||
|
||||
arango = ArangoDB(db_name=db_name) |
||||
|
||||
for attempt in range(max_retries): |
||||
print(f"Attempt {attempt+1}/{max_retries}...") |
||||
|
||||
try: |
||||
# Get the document from ArangoDB |
||||
document = arango.get_document(doc_id) |
||||
|
||||
# Check if the document has been summarized |
||||
if document and "summary" in document: |
||||
print("✓ Document summary found!") |
||||
print("-" * 50) |
||||
print("Document Summary:") |
||||
print("-" * 50) |
||||
print(document["summary"]["text_sum"]) |
||||
print("-" * 50) |
||||
|
||||
# Check if chunks have been summarized |
||||
if "chunks" in document and document["chunks"] and "summary" in document["chunks"][0]: |
||||
print("✓ Chunk summaries found!") |
||||
print("-" * 50) |
||||
print("First Chunk Summary:") |
||||
print("-" * 50) |
||||
print(document["chunks"][0]["summary"]) |
||||
print("-" * 50) |
||||
if len(document["chunks"]) > 1: |
||||
print("Tags:", document["chunks"][0]["tags"]) |
||||
|
||||
return document |
||||
|
||||
# If we haven't found summaries yet, wait and try again |
||||
time.sleep(wait_time) |
||||
|
||||
except Exception as e: |
||||
print(f"Error checking document: {e}") |
||||
time.sleep(wait_time) |
||||
|
||||
print("❌ Summarization not completed after maximum retries.") |
||||
return None |
||||
|
||||
if __name__ == "__main__": |
||||
print("LLM Server Test Script") |
||||
print("=====================\n") |
||||
|
||||
# Test if server is running |
||||
try: |
||||
requests.get("http://localhost:8100") |
||||
print("Server is running at http://localhost:8100\n") |
||||
except requests.exceptions.ConnectionError: |
||||
print("ERROR: Cannot connect to server at http://localhost:8100") |
||||
print("Make sure the server is running before continuing.\n") |
||||
exit(1) |
||||
|
||||
# Run tests and store document info for polling |
||||
doc1_info = test_summarize_document() |
||||
time.sleep(2) # Brief pause between tests |
||||
doc2_info = test_summarize_chunks() |
||||
|
||||
print("\nWaiting for background tasks to complete...") |
||||
print("This may take some time depending on LLM response speed.") |
||||
|
||||
# Poll for results (with longer wait time for the first document which needs to be chunked) |
||||
poll_for_results(doc1_info, max_retries=20, wait_time=6) |
||||
poll_for_results(doc2_info, max_retries=12, wait_time=5) |
||||
|
||||
print("\nTest script completed.") |
||||
print("If you didn't see results, the background tasks might still be processing.") |
||||
print("You can run this script again later to check, or query the database directly.") |
||||
@ -1,38 +0,0 @@ |
||||
import os |
||||
import base64 |
||||
from ollama import Client, ChatResponse |
||||
import env_manager |
||||
from colorprinter.print_color import * |
||||
import httpx |
||||
|
||||
env_manager.set_env() |
||||
|
||||
# Encode the credentials |
||||
auth = httpx.BasicAuth( |
||||
username='lasse', password=os.getenv("LLM_API_PWD_LASSE") |
||||
) |
||||
client = httpx.Client(auth=auth) |
||||
client = Client( |
||||
host="http://localhost:11434", |
||||
headers={ |
||||
"X-Chosen-Backend": "backend_ollama" # Add this header to specify the chosen backend |
||||
}, |
||||
auth=auth |
||||
) |
||||
response = client.chat( |
||||
model=os.getenv("LLM_MODEL"), |
||||
messages=[ |
||||
{ |
||||
"role": "user", |
||||
"content": "Why is the sky blue?", |
||||
}, |
||||
], |
||||
) |
||||
|
||||
# Print the response headers |
||||
|
||||
# Print the chosen backend from the headers |
||||
print("Chosen Backend:", response.headers.get("X-Chosen-Backend")) |
||||
|
||||
# Print the response content |
||||
print(response) |
||||
@ -1,9 +0,0 @@ |
||||
from _llm import LLM |
||||
|
||||
llm = LLM() |
||||
|
||||
image = '/home/lasse/sci/test_image.png' |
||||
image_bytes = open(image, 'rb').read() |
||||
print(type(image_bytes)) |
||||
response = llm.generate('What is this?', images=[image_bytes]) |
||||
print(response) |
||||
@ -1,206 +0,0 @@ |
||||
from _llm import LLM |
||||
from _arango import ArangoDB |
||||
from _chromadb import ChromaDB |
||||
from streamlit_chatbot import Bot |
||||
from pydantic import BaseModel, Field |
||||
from typing import Dict, List, Tuple |
||||
from colorprinter.print_color import * |
||||
from projects_page import Project |
||||
from _base_class import StreamlitBaseClass |
||||
from prompts import get_tools_prompt |
||||
|
||||
class ResearchBase(Bot): |
||||
def __init__(self, username, **args): |
||||
super().__init__(username=username, **args) |
||||
self.llm = LLM() |
||||
self.arango = ArangoDB() |
||||
self.chromadb = ChromaDB() |
||||
self.messages = [] |
||||
|
||||
def start(self): |
||||
self.messages = [{"role": "system", "message": self.llm.system_message}] |
||||
if self.llm.model in ["small", "standard", "vision", "reasoning", "tools"]: |
||||
self.llm.get_model(self.llm.model) |
||||
|
||||
|
||||
class ResearchManager(ResearchBase): |
||||
def __init__(self, username, project=None): |
||||
super().__init__(username=username, project=project) |
||||
self.llm.system_message = "You are an assistant helping a journalist writing a report based on extensive research." |
||||
self.llm.model = "reasoning" |
||||
self.start() |
||||
|
||||
def generate_plan(self, question): |
||||
query = f""" |
||||
A journalist wants to get a report that answers this question: "{question}" |
||||
THIS IS *NOT* A QUESTION YOU CAN ANSWER! Instead, you need to make a plan for how to answer this question. |
||||
Include what type of information you need from what available sources. |
||||
Available sources are: |
||||
- Scientific articles |
||||
- Other articles the journalists has gathered, such as blog posts, news articles, etc. |
||||
- The journalists own notes. |
||||
- Transcribed interviews (already done, you can't produce new ones). |
||||
All of the above sources are available in a database, but you need to specify what you need. Be as precise as possible. |
||||
As you don't have access to the sources, include steps to retrieve excerpts from articles and retrieve those that might be interesting. |
||||
Also include steps to verify the information. |
||||
Make the plan easy to follow and structured. |
||||
Remember: You are not answering the question, you are making *a plan* for how to answer the question using the available sources. |
||||
""" |
||||
query += f"\nTo help you understand the subject, here is a summary of notes the journalist has done: {project.notes_summary}" |
||||
query += """Please structure the plan like: |
||||
## Step 1: |
||||
- Task1: Description of task |
||||
- Task2: Description of task |
||||
## Step 2: |
||||
- Task1: Description of task |
||||
- Task2: Description of task |
||||
Etc, with as many steps and tasks as needed. |
||||
""" |
||||
return self.llm.generate(query).content |
||||
|
||||
|
||||
class ResearchAssistant(ResearchBase): |
||||
def __init__(self, username): |
||||
super().__init__(username) |
||||
self.llm.system_message = "You are a Research Assistant" |
||||
self.start() |
||||
|
||||
|
||||
class HelperBot(ResearchBase): |
||||
def __init__(self, username): |
||||
super().__init__(username) |
||||
self.llm.system_message = "You are helping a researcher to structure a text. You will get a text and make it into structured data. Make sure not to change the meaning of the text and keeps all the details in the subtasks." |
||||
self.llm.model = "small" |
||||
self.start() |
||||
|
||||
def make_structured_plan(self, text, question=None): |
||||
|
||||
class Plan(BaseModel): |
||||
steps: Dict[str, List[Tuple[str, str]]] = Field( |
||||
description="Structured plan represented as steps with their corresponding tasks or facts", |
||||
example={ |
||||
"Step 1: Gather Existing Materials": [ |
||||
("Task 1", "Description of task"), |
||||
("Task 2", "Description of task"), |
||||
], |
||||
"Step 2: Extract Relevant Information": [ |
||||
("Task 1", "Description of task"), |
||||
("Task 2", "Description of task"), |
||||
], |
||||
}, |
||||
) |
||||
|
||||
if question: |
||||
query = f''' This is a proposed plan for how to write a report on "{question}":\n"""{text}"""\nPlease make the plan into structured data with subtasks. Make sure to keep all the details in the subtasks.''' |
||||
else: |
||||
query = f''' This is a proposed plan for how to write a report:\n"""{text}"""\nPlease make the plan into structured data with subtasks. Make sure to keep all the details in the subtasks.''' |
||||
response = self.llm.generate(query, format=Plan.model_json_schema()) |
||||
print(response) |
||||
structured_response = Plan.model_validate_json(response.content) |
||||
print('PLAN') |
||||
print_rainbow(structured_response) |
||||
print() |
||||
return structured_response |
||||
|
||||
|
||||
class ToolBot(ResearchBase): |
||||
def __init__(self, username, tools: list): |
||||
super().__init__(username, tools=tools) |
||||
self.start() |
||||
tools_names = [tool.__name__ for tool in self.tools] |
||||
tools_name_string = "\n– ".join(tools_names) |
||||
self.llm = LLM( |
||||
temperature=0, |
||||
system_message=f""" |
||||
You are an helpful assistant with tools. The tools you can choose from are: |
||||
{tools_name_string} |
||||
Your task is to choose one or multiple tools to answering a user's query. |
||||
DON'T come up with your own tools, only use the ones provided. |
||||
""", |
||||
chat=False, |
||||
model="tools", |
||||
) |
||||
|
||||
def propose_tools(self, task): |
||||
query = f"""What tool(s) would you use to help with this task: |
||||
"{task}" |
||||
Answer in a structured way using the tool_calls field! |
||||
""" |
||||
query = get_tools_prompt(task) |
||||
response = self.llm.generate(query) |
||||
print_yellow('Model:', self.llm.model) |
||||
print_rainbow(response) |
||||
return response.tool_calls |
||||
|
||||
if __name__ == "__main__": |
||||
|
||||
base = StreamlitBaseClass(username="lasse") |
||||
project = Project( |
||||
username="lasse", |
||||
project_name="Monarch butterflies", |
||||
user_arango=base.get_arango(), |
||||
) |
||||
rm = ResearchManager(username="lasse", project=project) |
||||
tb = ToolBot( |
||||
username="lasse", |
||||
tools=[ |
||||
"fetch_science_articles_tool", |
||||
"fetch_notes_tool", |
||||
"fetch_other_documents_tool", |
||||
"fetch_science_articles_and_other_documents_tool", |
||||
] |
||||
) |
||||
# ra = ResearchAssistant(username="lasse") |
||||
hb = HelperBot(username="lasse") |
||||
|
||||
question = "Tell me five interesting facts about the Monarch butterfly" |
||||
|
||||
# Generate plan |
||||
plan = rm.generate_plan(question) |
||||
# -- Example of what a plan can look like -- |
||||
# plan = """## Step-by-Step Plan for Answering the Question: "Tell Me Five Interesting Facts About the Monarch Butterfly" |
||||
|
||||
# ### Step 1: Gather and Organize Existing Materials |
||||
# - **Task 1:** Retrieve all existing materials related to Monarch butterflies from the database using keywords such as "Monarch butterfly migration," "habitat loss," "milkweed," "insecticides," "climate change," "Monarch Butterfly Biosphere Reserve," and "migration patterns." |
||||
# - **Task 2:** Categorize these materials into scientific articles, other articles (blogs, news), own notes, and transcribed interviews for easy access. |
||||
|
||||
# ### Step 2: Extract Relevant Excerpts |
||||
# - **Task 1:** From the retrieved scientific articles, extract information on migration patterns, genetic studies, and population decline factors. |
||||
# - **Task 2:** From blogs and news articles, look for interesting anecdotes or recent findings about conservation efforts and unique behaviors of Monarch butterflies. |
||||
|
||||
# ### Step 3: Identify Potential Interesting Facts |
||||
# - **Task 1:** Review the extracted excerpts to identify potential facts such as migration patterns, threats faced by Monarchs, population decline statistics, conservation efforts, and unique behaviors. |
||||
# - **Task 2:** Compile a list of five compelling and accurate facts based on the extracted information. |
||||
|
||||
# ### Step 4: Verify Information |
||||
# - **Task 1:** Cross-check each fact with multiple sources to ensure accuracy. For example, verify migration details across scientific articles and recent news reports. |
||||
# - **Task 2:** Look for consensus among sources regarding population trends and threats to Monarchs. |
||||
|
||||
# ### Step 5: Structure the Report |
||||
# - **Task 1:** Organize the five selected facts into a coherent structure, ensuring each fact is clearly explained and engaging. |
||||
# - **Task 2:** Incorporate quotes or statistics from sources to add depth and credibility to each fact. |
||||
|
||||
# ### Step 6: Review and Finalize |
||||
# - **Task 1:** Proofread the report for clarity, accuracy, and grammar. |
||||
# - **Task 2:** Ensure all information is presented in an engaging manner suitable for a journalistic report. |
||||
|
||||
# This plan ensures that the journalist systematically gathers, verifies, and presents five interesting facts about Monarch butterflies, providing a comprehensive and accurate report. |
||||
# """ |
||||
#print_blue(plan) |
||||
if "</think>" in plan: |
||||
plan = plan.split("</think>")[1] |
||||
|
||||
# Make structured plan |
||||
structured_plan = hb.make_structured_plan(plan, question) |
||||
|
||||
|
||||
for step, tasks in structured_plan.steps.items(): |
||||
print_blue("\n### Step:", step) |
||||
for task in tasks: |
||||
|
||||
print_blue("Task:", task[0]) |
||||
print_yellow(task[1]) |
||||
|
||||
tools = tb.propose_tools(task[1]) |
||||
print_green("Tools:", tools) |
||||
print('\n') |
||||
@ -0,0 +1,123 @@ |
||||
import requests |
||||
import json |
||||
import time |
||||
|
||||
def test_summarize_document(): |
||||
""" |
||||
Test the document summarization functionality of the LLM server by sending a POST request |
||||
to the summarize_document endpoint. |
||||
|
||||
This function creates a sample document, sends it to the LLM server, and then polls for results. |
||||
""" |
||||
print("Testing document summarization...") |
||||
|
||||
# Define server endpoint |
||||
url = "http://localhost:8100/summarise_document" |
||||
|
||||
# Create a sample document |
||||
sample_document = { |
||||
"arango_doc": { |
||||
"text": """ |
||||
The Impact of Climate Change on Coral Reefs |
||||
|
||||
Climate change has significantly affected marine ecosystems worldwide, with coral reefs being among the most vulnerable. |
||||
Rising sea temperatures have led to increased coral bleaching events, where corals expel their symbiotic algae, |
||||
leading to whitening and potential death. Studies show that even a 1-2°C increase in water temperature |
||||
can trigger mass bleaching events. Additionally, ocean acidification caused by increased CO2 absorption |
||||
makes it difficult for corals to build their calcium carbonate skeletons. |
||||
|
||||
Recent research by Johnson et al. (2023) suggests that if current trends continue, we may lose up to 90% |
||||
of coral reefs by 2050. However, some corals have shown remarkable resilience. Certain species can adapt |
||||
to higher temperatures through a process called adaptive bleaching, where they exchange their algal symbionts |
||||
for more heat-tolerant varieties. Conservation efforts focused on cultivating these resilient species may |
||||
provide hope for reef preservation. |
||||
""", |
||||
"chunks": [] |
||||
}, |
||||
"arango_db_name": "test_db", |
||||
"arango_id": "articles/test_article", |
||||
"is_sci": True |
||||
} |
||||
|
||||
# Send request to server |
||||
print("Sending document to server for summarization...") |
||||
response = requests.post(url, json=sample_document) |
||||
|
||||
if response.status_code == 200: |
||||
print("Request accepted. Response:", response.json()) |
||||
|
||||
# In a real-world scenario, you might poll the database to see when the summary is ready |
||||
print("Note: In a real implementation, you would check the database for results.") |
||||
print("Since this is just a test, we're showing how the request works.") |
||||
|
||||
return True |
||||
else: |
||||
print(f"Error: {response.status_code}") |
||||
print(response.text) |
||||
return False |
||||
|
||||
def test_summarize_chunks(): |
||||
""" |
||||
Test the chunk summarization functionality directly by creating a sample document with chunks. |
||||
|
||||
In a real application, you'd typically query the results from the database after processing. |
||||
""" |
||||
print("\nTesting chunk summarization example...") |
||||
|
||||
# Sample document with chunks |
||||
sample_document_with_chunks = { |
||||
"arango_doc": { |
||||
"text": "", |
||||
"chunks": [ |
||||
{ |
||||
"text": "Climate change has significantly affected marine ecosystems worldwide, with coral reefs being among the most vulnerable. Rising sea temperatures have led to increased coral bleaching events.", |
||||
"pages": [1] |
||||
}, |
||||
{ |
||||
"text": "Studies by Smith et al. [1] show that even a 1-2°C increase in water temperature can trigger mass bleaching events. Additionally, ocean acidification makes it difficult for corals to build their calcium carbonate skeletons.", |
||||
"pages": [1, 2] |
||||
} |
||||
] |
||||
}, |
||||
"arango_db_name": "test_db", |
||||
"arango_id": "interviews/test_interview", |
||||
"is_sci": False |
||||
} |
||||
|
||||
# In a real implementation, you would: |
||||
# 1. Send this document to the server |
||||
# 2. Check the database later to see the summarized chunks |
||||
|
||||
url = "http://localhost:8100/summarise_document" |
||||
print("Sending document with chunks for summarization...") |
||||
response = requests.post(url, json=sample_document_with_chunks) |
||||
|
||||
if response.status_code == 200: |
||||
print("Request accepted. Response:", response.json()) |
||||
return True |
||||
else: |
||||
print(f"Error: {response.status_code}") |
||||
print(response.text) |
||||
return False |
||||
|
||||
if __name__ == "__main__": |
||||
print("LLM Server Test Script") |
||||
print("=====================\n") |
||||
|
||||
# Test if server is running |
||||
try: |
||||
requests.get("http://localhost:8100") |
||||
print("Server is running at http://localhost:8100\n") |
||||
except requests.exceptions.ConnectionError: |
||||
print("ERROR: Cannot connect to server at http://localhost:8100") |
||||
print("Make sure the server is running before continuing.\n") |
||||
exit(1) |
||||
|
||||
# Run tests |
||||
test_summarize_document() |
||||
time.sleep(2) # Brief pause between tests |
||||
test_summarize_chunks() |
||||
|
||||
print("\nTest script completed. Check your ArangoDB instance for results.") |
||||
print("Note: Document summarization happens in background tasks, so results may not be immediate.") |
||||
print("You would typically query the database to see the updated documents with summaries.") |
||||
@ -1,45 +0,0 @@ |
||||
import torch |
||||
from TTS.api import TTS |
||||
from datetime import datetime |
||||
# Get device |
||||
from TTS.tts.utils.speakers import SpeakerManager |
||||
device = "cuda" if torch.cuda.is_available() else "cpu" |
||||
|
||||
|
||||
# Init TTS |
||||
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) |
||||
|
||||
|
||||
exit() |
||||
|
||||
|
||||
|
||||
|
||||
text = """Hi there, thanks for having me! My interest in electric cars really started back when I was a teenager. I remember learning about the history of EVs and how they've been around since the late 1800s, even before gasoline cars took over. The fact that these vehicles could run on electricity instead of fossil fuels just fascinated me. |
||||
|
||||
Then, in the 90s, General Motors introduced the EV1 - it was a real game-changer. It showed that electric cars could be practical and enjoyable to drive. And when Tesla came along with their Roadster in 2007, proving that EVs could have a long range, I was hooked. |
||||
|
||||
But what really sealed my interest was learning about the environmental impact of EVs. They produce zero tailpipe emissions, which means they can help reduce air pollution and greenhouse gas emissions. That's something I'm really passionate about. |
||||
""" |
||||
text_se = """Antalet bilar ger dock bara en del av bilden. För att förstå bilberoendet bör vi framför allt titta på hur mycket bilarna faktiskt används. |
||||
Stockholmarnas genomsnittliga körsträcka med bil har minskat sedan millennieskiftet. Den är dock lägre i Göteborg och i Malmö. |
||||
I procent har bilanvändningen sedan år 2000 minskat lika mycket i Stockholm och Malmö, 9 procent. I Göteborg är minskningen 13 procent, i riket är minskningen 7 procent.""" |
||||
# Run TTS |
||||
# ❗ Since this model is multi-lingual voice cloning model, we must set the target speaker_wav and language |
||||
# Text to speech list of amplitude values as output |
||||
#wav = tts.tts(text=text, speaker_wav="my/cloning/audio.wav", language="en") |
||||
# Text to speech to a file |
||||
time_now = datetime.now().strftime("%Y%m%d%H%M%S") |
||||
output_path = f"output/tts_{time_now}.wav" |
||||
tts.tts_to_file(text=text, speaker_wav='voices/test/test_en.wav', language="en", file_path=output_path) |
||||
|
||||
|
||||
|
||||
|
||||
# api = TTS("tts_models/se/fairseq/vits") |
||||
|
||||
# api.tts_with_vc_to_file( |
||||
# text_se, |
||||
# speaker_wav="test_audio_se.wav", |
||||
# file_path="output_se.wav" |
||||
# ) |
||||
@ -1,22 +0,0 @@ |
||||
import requests |
||||
|
||||
# Define the server URL |
||||
server_url = "http://localhost:5002/api/tts" |
||||
|
||||
# Define the payload |
||||
payload = { |
||||
"text": "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", |
||||
"speaker": "Ana Florence", |
||||
"language": "en", |
||||
"split_sentences": True |
||||
} |
||||
|
||||
# Send the request to the TTS server |
||||
response = requests.post(server_url, json=payload) |
||||
|
||||
# Save the response audio to a file |
||||
if response.status_code == 200: |
||||
with open("output.wav", "wb") as f: |
||||
f.write(response.content) |
||||
else: |
||||
print(f"Error: {response.status_code}") |
||||
@ -1,33 +0,0 @@ |
||||
from TTS.tts.configs.tortoise_config import TortoiseConfig |
||||
from TTS.tts.models.tortoise import Tortoise |
||||
import torch |
||||
import os |
||||
import torchaudio |
||||
|
||||
# Initialize Tortoise model |
||||
config = TortoiseConfig() |
||||
model = Tortoise.init_from_config(config) |
||||
model.load_checkpoint(config, checkpoint_dir="tts_models/en/multi-dataset/tortoise-v2", eval=True) |
||||
|
||||
# Move model to GPU if available |
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
||||
print(device) |
||||
model.to(device) |
||||
|
||||
# Define the text and voice directory |
||||
text = "There is, therefore, an increasing need to understand BEVs from a systems perspective." |
||||
voice_dir = "voices" |
||||
speaker = "test" |
||||
|
||||
# Load voice samples |
||||
voice_samples = [] |
||||
for file_name in os.listdir(os.path.join(voice_dir, speaker)): |
||||
file_path = os.path.join(voice_dir, speaker, file_name) |
||||
waveform, sample_rate = torchaudio.load(file_path) |
||||
voice_samples.append(waveform) |
||||
|
||||
# Get conditioning latents |
||||
conditioning_latents = model.get_conditioning_latents(voice_samples) |
||||
|
||||
# Save conditioning latents to a file |
||||
torch.save(conditioning_latents, "conditioning_latents.pth") |
||||
@ -0,0 +1,111 @@ |
||||
#!/usr/bin/env python3 |
||||
""" |
||||
View Latest LLM Server Results |
||||
|
||||
This script displays the latest document summaries generated by the LLM server |
||||
directly in the terminal, providing a quick way to check results without |
||||
having to use a web browser. |
||||
|
||||
Usage: |
||||
python view_latest_results.py [--raw] [--json] |
||||
|
||||
Options: |
||||
--raw Display the raw result data |
||||
--json Format the output as JSON |
||||
""" |
||||
|
||||
import json |
||||
import os |
||||
import sys |
||||
import argparse |
||||
from datetime import datetime |
||||
|
||||
|
||||
def load_latest_result(): |
||||
"""Load the latest result from the JSON file.""" |
||||
latest_result_file = os.path.join(os.path.dirname(__file__), "latest_summary_result.json") |
||||
try: |
||||
if os.path.exists(latest_result_file): |
||||
with open(latest_result_file, 'r') as f: |
||||
return json.load(f) |
||||
else: |
||||
print(f"No results file found at {latest_result_file}") |
||||
return None |
||||
except Exception as e: |
||||
print(f"Error loading results: {e}") |
||||
return None |
||||
|
||||
|
||||
def display_raw(result): |
||||
"""Display the raw result data.""" |
||||
print(json.dumps(result, indent=2)) |
||||
|
||||
|
||||
def display_formatted(result): |
||||
"""Display the result in a nicely formatted way.""" |
||||
if not result: |
||||
print("No results available") |
||||
return |
||||
|
||||
print("\n" + "=" * 80) |
||||
print(f"DOCUMENT: {result.get('_id', 'Unknown')}") |
||||
print("=" * 80) |
||||
|
||||
# Document summary |
||||
summary = result.get("summary", {}).get("text_sum", "No summary available") |
||||
print("\n📄 DOCUMENT SUMMARY") |
||||
print("-" * 80) |
||||
print(summary) |
||||
|
||||
# Model info if available |
||||
if "summary" in result and "meta" in result["summary"]: |
||||
meta = result["summary"]["meta"] |
||||
model = meta.get("model", "Unknown") |
||||
temp = meta.get("temperature", "Unknown") |
||||
print(f"\nGenerated using: {model} (temperature: {temp})") |
||||
|
||||
# Display chunks |
||||
chunks = result.get("chunks", []) |
||||
if chunks: |
||||
summarized_chunks = [chunk for chunk in chunks if "summary" in chunk] |
||||
print(f"\n🧩 CHUNK SUMMARIES ({len(summarized_chunks)}/{len(chunks)} chunks processed)") |
||||
|
||||
for i, chunk in enumerate(summarized_chunks): |
||||
print("\n" + "-" * 80) |
||||
print(f"Chunk {i+1}:") |
||||
print("-" * 80) |
||||
print(chunk["summary"]) |
||||
|
||||
# Display tags |
||||
if "tags" in chunk and chunk["tags"]: |
||||
print("\nTags:", ", ".join(chunk["tags"])) |
||||
|
||||
# Display references |
||||
if "references" in chunk and chunk["references"]: |
||||
print("\nReferences:") |
||||
for ref in chunk["references"]: |
||||
print(f"- {ref}") |
||||
|
||||
print("\n" + "=" * 80) |
||||
|
||||
|
||||
def main(): |
||||
parser = argparse.ArgumentParser(description='View latest LLM server results') |
||||
parser.add_argument('--raw', action='store_true', help='Display raw result data') |
||||
parser.add_argument('--json', action='store_true', help='Format output as JSON') |
||||
args = parser.parse_args() |
||||
|
||||
result = load_latest_result() |
||||
|
||||
if not result: |
||||
print("No results available") |
||||
return |
||||
|
||||
if args.raw or args.json: |
||||
display_raw(result) |
||||
else: |
||||
display_formatted(result) |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
main() |
||||
Loading…
Reference in new issue