Refactor article processing and enhance project management features

main
lasseedfast 9 months ago
parent 732793b79f
commit 83044a905b
  1. 12
      _arango.py
  2. 9
      _base_class.py
  3. 800
      _bots.py
  4. 48
      _classes.py
  5. 213
      _llm.py
  6. 2
      article2db.py
  7. 123
      bluesky_bot.py
  8. 76
      projects_page.py
  9. 10
      prompts.py
  10. 479
      streamlit_chatbot.py
  11. 4
      streamlit_pages.py

@ -21,16 +21,22 @@ class ArangoDB:
"""
host = os.getenv("ARANGO_HOST")
if not user:
user = os.getenv("ARANGO_USER")
if not password:
password = os.getenv("ARANGO_PASSWORD")
if not db_name:
db_name = os.getenv("ARANGO_DB")
if user:
db_name = user
else:
db_name = os.getenv("ARANGO_DB")
if not user:
user = os.getenv("ARANGO_USER")
self.client = ArangoClient(hosts=host)
if user=='lasse': #! This need to be fixed to work with all users!
password = os.getenv("ARANGO_PWD_LASSE")
self.db = self.client.db(db_name, username=user, password=password)
def fix_key(self, _key):
"""
Sanitize a given key by replacing all characters that are not alphanumeric,

@ -22,7 +22,9 @@ class BaseClass:
elif admin:
return ArangoDB()
else:
return ArangoDB(user=self.username)
from colorprinter.print_color import print_yellow
print_yellow(f"User: {self.username}")
return ArangoDB(user=self.username, db_name=self.username)
def get_article_collections(self) -> list:
article_collections = self.user_arango.db.aql.execute(
@ -102,6 +104,9 @@ class StreamlitBaseClass(BaseClass):
)
st.session_state["settings"][key] = value
def get_settings(self):
return self.user_arango.db.document("settings/settings")
def update_session_state(self, page_name=None):
@ -149,10 +154,12 @@ class StreamlitBaseClass(BaseClass):
projects = self.get_projects()
project = st.selectbox(text, projects, index=None)
print('Choosing project...')
if project:
from _classes import Project
self.project = Project(self.username, project, self.user_arango)
self.collection = None
self.update_settings("current_project", self.project.name)
self.update_session_state()
print('CHOOSEN PROJECT:', self.project.name)
return self.project

@ -0,0 +1,800 @@
from datetime import datetime
import streamlit as st
from _base_class import StreamlitBaseClass, BaseClass
from _llm import LLM
from prompts import *
from colorprinter.print_color import *
from llm_tools import ToolRegistry
class Chat(StreamlitBaseClass):
def __init__(self, username=None, **kwargs):
super().__init__(username=username, **kwargs)
self.name = kwargs.get("name", None)
self.chat_history = kwargs.get("chat_history", [])
def add_message(self, role, content):
self.chat_history.append(
{
"role": role,
"content": content.strip().strip('"'),
"role_type": self.role,
}
)
def to_dict(self):
return {
"_key": self._key,
"name": self.name,
"chat_history": self.chat_history,
"role": self.role,
"username": self.username,
}
def update_in_arango(self):
self.last_updated = datetime.now().isoformat()
self.user_arango.db.collection("chats").insert(
self.to_dict(), overwrite=True, overwrite_mode="update"
)
def set_name(self, user_input):
llm = LLM(
model="small",
max_length_answer=50,
temperature=0.4,
system_message="You are a chatbot who will be chatting with a user",
)
prompt = (
f'Give a short name to the chat based on this user input: "{user_input}" '
"No more than 30 characters. Answer ONLY with the name of the chat."
)
name = llm.generate(prompt).content.strip('"')
name = f'{name} - {datetime.now().strftime("%B %d")}'
existing_chat = self.user_arango.db.aql.execute(
f'FOR doc IN chats FILTER doc.name == "{name}" RETURN doc', count=True
)
if existing_chat.count() > 0:
name = f'{name} ({datetime.now().strftime("%H:%M")})'
name += f" - [{self.role}]"
self.name = name
return name
@classmethod
def from_dict(cls, data):
return cls(
username=data.get("username"),
name=data.get("name"),
chat_history=data.get("chat_history", []),
role=data.get("role", "Research Assistant"),
_key=data.get("_key"),
)
def chat_history2bot(self, n_messages: int = None, remove_system: bool = False):
history = [
{"role": m["role"], "content": m["content"]} for m in self.chat_history
]
if n_messages and len(history) > n_messages:
history = history[-n_messages:]
if (
all([history[0]["role"] == "system", remove_system])
or history[0]["role"] == "assistant"
):
history = history[1:]
return history
class Bot(BaseClass):
def __init__(self, username: str, chat: Chat = None, tools: list = None, **kwargs):
super().__init__(username=username, **kwargs)
# Use the passed in chat or create a new Chat
self.chat = chat if chat else Chat(username=username, role="Research Assistant")
print_yellow(f"Chat:", chat, type(chat))
# Store or set up project/collection if available
self.project = kwargs.get("project", None)
self.collection = kwargs.get("collection", None)
if self.collection and not isinstance(self.collection, list):
self.collection = [self.collection]
# Load articles in the collections
self.arango_ids = []
if self.collection:
for c in self.collection:
for _id in self.user_arango.db.aql.execute(
"""
FOR doc IN article_collections
FILTER doc.name == @collection
FOR article IN doc.articles
RETURN article._id
""",
bind_vars={"collection": c},
):
self.arango_ids.append(_id)
# A standard LLM for normal chat
self.chatbot = LLM(messages=self.chat.chat_history2bot())
# A helper bot for generating queries or short prompts
self.helperbot = LLM(
temperature=0,
model="small",
max_length_answer=500,
system_message=get_query_builder_system_message(),
messages=self.chat.chat_history2bot(n_messages=4, remove_system=True),
)
# A specialized LLM picking which tool to use
self.toolbot = LLM(
temperature=0,
system_message="""
You are an assistant bot helping an answering bot to answer a user's messages.
Your task is to choose one or multiple tools that will help the answering bot to provide the user with the best possible answer.
You should NEVER directly answer the user. You MUST choose a tool.
""",
chat=False,
model="small",
)
# Load or register the passed-in tools
if tools:
self.tools = ToolRegistry.get_tools(tools=tools)
else:
self.tools = ToolRegistry.get_tools()
# Store other kwargs
for arg in kwargs:
setattr(self, arg, kwargs[arg])
def get_chunks(
self,
user_input,
collections=["sci_articles", "other_documents"],
n_results=7,
n_sources=4,
filter=True,
):
# Basic version without Streamlit calls
query = self.helperbot.generate(
get_generate_vector_query_prompt(user_input, self.chat.role)
).content.strip('"')
combined_chunks = []
if collections:
for collection in collections:
where_filter = {"_id": {"$in": self.arango_ids}} if filter else {}
chunks = self.get_chromadb().query(
query=query,
collection=collection,
n_results=n_results,
n_sources=n_sources,
where=where_filter,
max_retries=3,
)
for doc, meta, dist in zip(
chunks["documents"][0],
chunks["metadatas"][0],
chunks["distances"][0],
):
combined_chunks.append(
{"document": doc, "metadata": meta, "distance": dist}
)
combined_chunks.sort(key=lambda x: x["distance"])
# Keep the best chunks according to n_sources
sources = set()
closest_chunks = []
for chunk in combined_chunks:
source_id = chunk["metadata"].get("_id", "no_id")
if source_id not in sources:
sources.add(source_id)
closest_chunks.append(chunk)
if len(sources) >= n_sources:
break
if len(closest_chunks) < n_results:
remaining_chunks = [
c for c in combined_chunks if c not in closest_chunks
]
closest_chunks.extend(remaining_chunks[: n_results - len(closest_chunks)])
# Now fetch real metadata from Arango
for chunk in closest_chunks:
_id = chunk["metadata"].get("_id")
if not _id:
continue
if _id.startswith("sci_articles"):
arango_doc = self.base_arango.db.document(_id)
else:
arango_doc = self.user_arango.db.document(_id)
if arango_doc:
arango_metadata = arango_doc.get("metadata", {})
# Possibly merge notes
if "user_notes" in arango_doc:
arango_metadata["user_notes"] = arango_doc["user_notes"]
chunk["metadata"] = arango_metadata
# Group by article title
grouped_chunks = {}
article_number = 1
for chunk in closest_chunks:
title = chunk["metadata"].get("title", "No title")
chunk["article_number"] = article_number
if title not in grouped_chunks:
grouped_chunks[title] = {
"article_number": article_number,
"chunks": [],
}
article_number += 1
grouped_chunks[title]["chunks"].append(chunk)
return grouped_chunks
def answer_tool_call(self, response, user_input):
bot_responses = []
# This method returns / stores responses (no Streamlit calls)
if not response.get("tool_calls"):
return ""
for tool in response.get("tool_calls"):
function_name = tool.function.get('name')
arguments = tool.function.arguments
arguments["query"] = user_input
if hasattr(self, function_name):
if function_name in [
"fetch_other_documents_tool",
"fetch_science_articles_tool",
"fetch_science_articles_and_other_documents_tool",
]:
chunks = getattr(self, function_name)(**arguments)
bot_responses.append(
self.generate_from_chunks(user_input, chunks).strip('"')
)
elif function_name == "fetch_notes_tool":
notes = getattr(self, function_name)()
bot_responses.append(
self.generate_from_notes(user_input, notes).strip('"')
)
elif function_name == "conversational_response_tool":
bot_responses.append(
getattr(self, function_name)(user_input).strip('"')
)
return "\n\n".join(bot_responses)
def process_user_input(self, user_input, content_attachment=None):
# Add user message
self.chat.add_message("user", user_input)
if not content_attachment:
prompt = get_tools_prompt(user_input)
response = self.toolbot.generate(prompt, tools=self.tools, stream=False)
if response.get("tool_calls"):
bot_response = self.answer_tool_call(response, user_input)
else:
# Just respond directly
bot_response = response.content.strip('"')
else:
# If there's an attachment, do something minimal
bot_response = "Content attachment received (Base Bot)."
# Add assistant message
if self.chat.chat_history[-1]["role"] != "assistant":
self.chat.add_message("assistant", bot_response)
# Update in Arango
self.chat.update_in_arango()
return bot_response
def generate_from_notes(self, user_input, notes):
# No Streamlit calls
notes_string = ""
for note in notes:
notes_string += f"\n# {note.get('title','No title')}\n{note.get('content','')}\n---\n"
prompt = get_chat_prompt(user_input, content_string=notes_string, role=self.chat.role)
return self.chatbot.generate(prompt, stream=True)
def generate_from_chunks(self, user_input, chunks):
# No Streamlit calls
chunks_string = ""
for title, group in chunks.items():
user_notes_string = ""
if "user_notes" in group["chunks"][0]["metadata"]:
notes = group["chunks"][0]["metadata"]["user_notes"]
user_notes_string = f'\n\nUser notes:\n"""\n{notes}\n"""\n\n'
docs = "\n(...)\n".join([c["document"] for c in group["chunks"]])
chunks_string += (
f"\n# {title}\n## Article #{group['article_number']}\n{user_notes_string}{docs}\n---\n"
)
prompt = get_chat_prompt(user_input, content_string=chunks_string, role=self.chat.role)
return self.chatbot.generate(prompt, stream=True)
def run(self):
# Base Bot has no Streamlit run loop
pass
def get_notes(self):
# Minimal note retrieval
notes = self.user_arango.db.aql.execute(
f'FOR doc IN notes FILTER doc.project == "{self.project.name if self.project else ""}" RETURN doc'
)
return list(notes)
@ToolRegistry.register
def fetch_science_articles_tool(self, query: str, n_documents: int):
"""
"Fetches information from scientific articles. Use this tool when the user is looking for information from scientific articles."
Parameters:
query (str): The search query to find relevant scientific articles.
n_documents (int): How many documents to fetch. A complex query may require more documents. Min: 3, Max: 10.
Returns:
list: A list of chunks containing information from the fetched scientific articles.
"""
print_purple('Query:', query)
n_documents = int(n_documents)
if n_documents < 3:
n_documents = 3
elif n_documents > 10:
n_documents = 10
return self.get_chunks(
query, collections=["sci_articles"], n_results=n_documents
)
@ToolRegistry.register
def fetch_other_documents_tool(self, query: str, n_documents: int):
"""
Fetches information from other documents based on the user's query.
This method retrieves information from various types of documents such as reports, news articles, and other texts. It should be used only when it is clear that the user is not seeking scientific articles.
Args:
query (str): The search query provided by the user.
n_documents (int): How many documents to fetch. A complex query may require more documents. Min: 2, Max: 10.
Returns:
list: A list of document chunks that match the query.
"""
assert isinstance(self, Bot), "The first argument must be a Bot object."
n_documents = int(n_documents)
if n_documents < 2:
n_documents = 2
elif n_documents > 10:
n_documents = 10
return self.get_chunks(
query,
collections=[f"{self.username}__other_documents"],
n_results=n_documents,
)
@ToolRegistry.register
def fetch_science_articles_and_other_documents_tool(
self, query: str, n_documents: int
):
"""
Fetches information from both scientific articles and other documents.
This method is often used when the user hasn't specified what kind of sources they are interested in.
Args:
query (str): The search query to fetch information for.
n_documents (int): How many documents to fetch. A complex query may require more documents. Min: 3, Max: 10.
Returns:
list: A list of document chunks that match the search query.
"""
assert isinstance(self, Bot), "The first argument must be a Bot object."
n_documents = int(n_documents)
if n_documents < 3:
n_documents = 3
elif n_documents > 10:
n_documents = 10
return self.get_chunks(
query,
collections=["sci_articles", f"{self.username}__other_documents"],
n_results=n_documents,
)
@ToolRegistry.register
def fetch_notes_tool(bot):
"""
Fetches information from the project notes when you as an editor need context from the project notes to understand other information. ONLY use this together with other tools! No arguments needed.
Returns:
list: A list of notes.
"""
assert isinstance(bot, Bot), "The first argument must be a Bot object."
return bot.get_notes()
@ToolRegistry.register
def conversational_response_tool(self, query: str):
"""
Generate a conversational response to a user's query.
This method is designed to provide a short and conversational response
without fetching additional data. It should be used only when it is clear
that the user is engaging in small talk (like saying 'hi') and not seeking detailed information.
Args:
query (str): The user's message to which the bot should respond.
Returns:
str: The generated conversational response.
"""
query = f"""
User message: "{query}".
Make your answer short and conversational.
This is perhaps not a conversation about a journalistic project, so try not to be too informative.
Don't answer with anything you're not sure of!
"""
result = (
self.chatbot.generate(query, stream=True)
if self.chatbot
else self.llm.generate(query, stream=True)
)
return result
class StreamlitBot(Bot):
def __init__(self, username: str, chat: StreamlitChat = None, tools: list = None, **kwargs):
print_purple("StreamlitBot init chat:", chat)
super().__init__(username=username, chat=chat, tools=tools, **kwargs)
# For Streamlit, we can override or add attributes
if 'llm_chosen_backend' not in st.session_state:
st.session_state['llm_chosen_backend'] = None
self.chatbot.chosen_backend = st.session_state['llm_chosen_backend']
if not st.session_state['llm_chosen_backend']:
st.session_state['llm_chosen_backend'] = self.chatbot.chosen_backend
def run(self):
# Example Streamlit run loop
self.chat.show_chat_history()
if user_input := st.chat_input("Write your message here...", accept_file=True):
text_input = user_input.text.replace('"""', "---")
if len(user_input.files) > 1:
st.error("Please upload only one file at a time.")
return
attached_file = user_input.files[0] if user_input.files else None
content_attachment = None
if attached_file:
if attached_file.type == "application/pdf":
import fitz
pdf_document = fitz.open(stream=attached_file.read(), filetype="pdf")
pdf_text = ""
for page_num in range(len(pdf_document)):
page = pdf_document.load_page(page_num)
pdf_text += page.get_text()
content_attachment = pdf_text
elif attached_file.type in ["image/png", "image/jpeg"]:
self.chat.message_attachments = "image"
content_attachment = attached_file.read()
with st.chat_message("user", avatar=self.chat.get_avatar(role="user")):
st.image(content_attachment)
with st.chat_message("user", avatar=self.chat.get_avatar(role="user")):
st.write(text_input)
if not self.chat.name:
self.chat.set_name(text_input)
self.chat.last_updated = datetime.now().isoformat()
self.chat.saved = False
self.user_arango.db.collection("chats").insert(
self.chat.to_dict(), overwrite=True, overwrite_mode="update"
)
self.process_user_input(text_input, content_attachment)
def process_user_input(self, user_input, content_attachment=None):
# We override to show messages in Streamlit instead of just storing
self.chat.add_message("user", user_input)
if not content_attachment:
prompt = get_tools_prompt(user_input)
response = self.toolbot.generate(prompt, tools=self.tools, stream=False)
if response.get("tool_calls"):
bot_response = self.answer_tool_call(response, user_input)
else:
bot_response = response.content.strip('"')
with st.chat_message("assistant", avatar=self.chat.get_avatar(role="assistant")):
st.write(bot_response)
else:
with st.chat_message("assistant", avatar=self.chat.get_avatar(role="assistant")):
with st.spinner("Reading the content..."):
if self.chat.message_attachments == "image":
prompt = get_chat_prompt(user_input, role=self.chat.role, image_attachment=True)
bot_resp = self.chatbot.generate(prompt, stream=False, images=[content_attachment], model="vision")
st.write(bot_resp)
bot_response = bot_resp
else:
prompt = get_chat_prompt(user_input, content_attachment=content_attachment, role=self.chat.role)
response = self.chatbot.generate(prompt, stream=True)
bot_response = st.write_stream(response)
if self.chat.chat_history[-1]["role"] != "assistant":
self.chat.add_message("assistant", bot_response)
self.chat.update_in_arango()
def answer_tool_call(self, response, user_input):
bot_responses = []
for tool in response.get("tool_calls", []):
function_name = tool.function.get('name')
arguments = tool.function.arguments
arguments["query"] = user_input
with st.chat_message("assistant", avatar=self.chat.get_avatar(role="assistant")):
if function_name in [
"fetch_other_documents_tool",
"fetch_science_articles_tool",
"fetch_science_articles_and_other_documents_tool",
]:
chunks = getattr(self, function_name)(**arguments)
response_text = self.generate_from_chunks(user_input, chunks)
bot_response = st.write_stream(response_text).strip('"')
if chunks:
sources = "###### Sources:\n"
for title, group in chunks.items():
j = group["chunks"][0]["metadata"].get("journal", "No Journal")
d = group["chunks"][0]["metadata"].get("published_date", "No Date")
sources += f"[{group['article_number']}] **{title}** :gray[{j} ({d})]\n"
st.markdown(sources)
bot_response += f"\n\n{sources}"
bot_responses.append(bot_response)
elif function_name == "fetch_notes_tool":
notes = getattr(self, function_name)()
response_text = self.generate_from_notes(user_input, notes)
bot_responses.append(st.write_stream(response_text).strip('"'))
elif function_name == "conversational_response_tool":
response_text = getattr(self, function_name)(user_input)
bot_responses.append(st.write_stream(response_text).strip('"'))
return "\n\n".join(bot_responses)
def generate_from_notes(self, user_input, notes):
with st.spinner("Reading project notes..."):
return super().generate_from_notes(user_input, notes)
def generate_from_chunks(self, user_input, chunks):
# For reading articles with a spinner
magazines = set()
for group in chunks.values():
j = group["chunks"][0]["metadata"].get("journal", "No Journal")
magazines.add(f"*{j}*")
s = (
f"Reading articles from {', '.join(list(magazines)[:-1])} and {list(magazines)[-1]}..."
if len(magazines) > 1
else "Reading articles..."
)
with st.spinner(s):
return super().generate_from_chunks(user_input, chunks)
def sidebar_content(self):
with st.sidebar:
st.write("---")
st.markdown(f'#### {self.chat.name if self.chat.name else ""}')
st.button("Delete this chat", on_click=self.delete_chat)
def delete_chat(self):
self.user_arango.db.collection("chats").delete_match(
filters={"name": self.chat.name}
)
self.chat = Chat()
def get_notes(self):
# We can show a spinner or messages too
with st.spinner("Fetching notes..."):
return super().get_notes()
class EditorBot(StreamlitBot(Bot)):
def __init__(self, chat: Chat, username: str, **kwargs):
print_blue("EditorBot init chat:", chat)
super().__init__(chat=chat, username=username, **kwargs)
self.role = "Editor"
self.tools = ToolRegistry.get_tools()
self.chatbot = LLM(
system_message=get_editor_prompt(kwargs.get("project")),
messages=self.chat.chat_history2bot(),
chosen_backend=kwargs.get("chosen_backend"),
)
class ResearchAssistantBot(StreamlitBot(Bot)):
def __init__(self, chat: Chat, username: str, **kwargs):
super().__init__(chat=chat, username=username, **kwargs)
self.role = "Research Assistant"
self.chatbot = LLM(
system_message=get_assistant_prompt(),
temperature=0.1,
messages=self.chat.chat_history2bot(),
)
self.tools = [
self.fetch_science_articles_tool,
self.fetch_science_articles_and_other_documents_tool,
]
class PodBot(StreamlitBot(Bot)):
"""Two LLM agents construct a conversation using material from science articles."""
def __init__(
self,
chat: Chat,
subject: str,
username: str,
instructions: str = None,
**kwargs,
):
super().__init__(chat=chat, username=username, **kwargs)
self.subject = subject
self.instructions = instructions
self.guest_name = kwargs.get("name_guest", "Merit")
self.hostbot = HostBot(
Chat(username=self.username, role="Host"),
subject,
username,
instructions=instructions,
**kwargs,
)
self.guestbot = GuestBot(
Chat(username=self.username, role="Guest"),
subject,
username,
name_guest=self.guest_name,
**kwargs,
)
def run(self):
notes = self.get_notes()
notes_string = ""
if self.instructions:
instructions_string = f'''
These are the instructions for the podcast from the producer:
"""
{self.instructions}
"""
'''
else:
instructions_string = ""
for note in notes:
notes_string += f"\n# {note['title']}\n{note['content']}\n---\n"
a = f'''You will make a podcast interview with {self.guest_name}, an expert on "{self.subject}".
{instructions_string}
Below are notes on the subject that you can use to ask relevant questions:
"""
{notes_string}
"""
Say hello to the expert and start the interview. Remember to keep the interview to the subject of {self.subject} throughout the conversation.
'''
# Stop button for the podcast
with st.sidebar:
stop = st.button("Stop podcast", on_click=self.stop_podcast)
while st.session_state["make_podcast"]:
# Stop the podcast if there are more than 14 messages in the chat
self.chat.show_chat_history()
if len(self.chat.chat_history) == 14:
result = self.hostbot.generate(
"The interview has ended. Say thank you to the expert and end the conversation."
)
self.chat.add_message("Host", result)
with st.chat_message(
"assistant", avatar=self.chat.get_avatar(role="assistant")
):
st.write(result.strip('"'))
st.stop()
_q = self.hostbot.toolbot.generate(
query=f"{self.guest_name} has answered: {a}. You have to choose a tool to help the host continue the interview.",
tools=self.hostbot.tools,
temperature=0.6,
stream=False,
)
if "tool_calls" in _q:
q = self.hostbot.answer_tool_call(_q, a)
else:
q = _q
self.chat.add_message("Host", q)
_a = self.guestbot.toolbot.generate(
f'The podcast host has asked: "{q}" Choose a tool to help the expert answer with relevant facts and information.',
tools=self.guestbot.tools,
)
if "tool_calls" in _a:
print_yellow("Tool call response (guest)", _a)
print_yellow(self.guestbot.chat.role)
a = self.guestbot.answer_tool_call(_a, q)
else:
a = _a
self.chat.add_message("Guest", a)
self.update_session_state()
def stop_podcast(self):
st.session_state["make_podcast"] = False
self.update_session_state()
self.chat.show_chat_history()
class HostBot(StreamlitBot(Bot)):
def __init__(
self, chat: Chat, subject: str, username: str, instructions: str, **kwargs
):
super().__init__(chat=chat, username=username, **kwargs)
self.chat.role = kwargs.get("role", "Host")
self.tools = ToolRegistry.get_tools(
tools=[
self.fetch_notes_tool,
self.conversational_response_tool,
# "fetch_other_documents", #TODO Should this be included?
]
)
self.instructions = instructions
self.llm = LLM(
system_message=f'''
You are the host of a podcast and an expert on {subject}. You will ask one question at a time about the subject, and then wait for the guest to answer.
Don't ask the guest to talk about herself/himself, only about the subject.
Make your questions short and clear, only if necessary add a brief context to the question.
These are the instructions for the podcast from the producer:
"""
{self.instructions}
"""
If the experts' answer is complicated, try to make a very brief summary of it for the audience to understand. You can also ask follow-up questions to clarify the answer, or ask for examples.
''',
messages=self.chat.chat_history2bot()
)
self.toolbot = LLM(
temperature=0,
system_message="""
You are assisting a podcast host in asking questions to an expert.
Choose one or many tools to use in order to assist the host in asking relevant questions.
Often "conversational_response_tool" is enough, but sometimes project notes are needed.
Make sure to read the description of the tools carefully!""",
chat=False,
model="small",
)
def generate(self, query):
return self.llm.generate(query)
class GuestBot(StreamlitBot(Bot)):
def __init__(self, chat: Chat, subject: str, username: str, **kwargs):
super().__init__(chat=chat, username=username, **kwargs)
self.chat.role = kwargs.get("role", "Guest")
self.tools = ToolRegistry.get_tools(
tools=[
self.fetch_notes_tool,
self.fetch_science_articles_tool,
]
)
self.llm = LLM(
system_message=f"""
You are {kwargs.get('name', 'Merit')}, an expert on {subject}.
Today you are a guest in a podcast about {subject}. A host will ask you questions about the subject and you will answer by using scientific facts and information.
When answering, don't say things like "based on the documents" or alike, as neither the host nor the audience can see the documents. Act just as if you were talking to someone in a conversation.
Try to be concise when answering, and remember that the audience of the podcast is not expert on the subject, so don't complicate things too much.
It's very important that you answer in a "spoken" way, as if you were talking to someone in a conversation. That means you should avoid using scientific jargon and complex terms, too many figures or abstract concepts.
Lists are also not recommended, instead use "for the first reason", "secondly", etc.
Instead, use "..." to indicate a pause, "-" to indicate a break in the sentence, as if you were speaking.
""",
messages=self.chat.chat_history2bot()
)
self.toolbot = LLM(
temperature=0,
system_message=f"You are an assistant to an expert on {subject}. Choose one or many tools to use in order to assist the expert in answering questions. Make sure to read the description of the tools carefully.",
chat=False,
model="small",
)
def generate(self, query):
return self.llm.generate(query)

@ -4,10 +4,11 @@ from time import sleep
from datetime import datetime, timedelta
from colorprinter.print_color import *
from streamlit_chatbot import StreamlitChat, EditorBot, ResearchAssistantBot, PodBot, StreamlitBot
from _base_class import StreamlitBaseClass
from _rss import RSSReader
from projects_page import Project
from streamlit_chatbot import StreamlitChat
class BotChatPage(StreamlitBaseClass):
@ -27,17 +28,20 @@ class BotChatPage(StreamlitBaseClass):
setattr(self, k, v)
def run(self):
from streamlit_chatbot import EditorBot, ResearchAssistantBot, PodBot, StreamlitBot
bot = None
self.update_current_page("Bot Chat")
self.remove_old_unsaved_chats()
self.sidebar_actions()
if self.collection_name or self.project:
print_purple("Collection:", self.collection_name, "Project:", self.project_name)
# If no chat exists, create a new Chat instance
self.chat = self.get_chat(role=self.role)
# Create a Bot instance with the Chat object
if self.role == "Research Assistant":
print_blue("Creating Research Assistant Bot")
bot = ResearchAssistantBot(
username=self.username,
chat=self.chat,
@ -83,6 +87,7 @@ class BotChatPage(StreamlitBaseClass):
"role": self.role,
}
else: # If no collection or project is selected, use the conversational response bot
print_yellow("No collection or project selected. Using conversational response bot.")
bot = StreamlitBot(
username=self.username,
chat=self.get_chat(),
@ -92,14 +97,15 @@ class BotChatPage(StreamlitBaseClass):
def get_chat(self, role="Research Assistant"):
print_blue('CHAT TYPE:', role)
if 'chat_key' not in st.session_state:
chat=StreamlitChat(username=self.username, role=role)
chat = StreamlitChat(username=self.username, role=role)
st.session_state['chat_key'] = chat._key
print_blue("Creating new chat:", st.session_state['chat_key'])
else:
print_blue("Old chat:", st.session_state['chat_key'])
chat = self.user_arango.db.collection("chats").get(st.session_state['chat_key'])
chat = StreamlitChat.from_dict(chat)
chat_data = self.user_arango.db.collection("chats").get(st.session_state['chat_key'])
chat = StreamlitChat.from_dict(chat_data)
return chat
def sidebar_actions(self):
@ -107,11 +113,11 @@ class BotChatPage(StreamlitBaseClass):
self.collection = self.choose_collection(
"Article collection to use for chat:"
)
self.project_name = self.choose_project("Project to use for chat:")
self.project = self.choose_project("Project to use for chat:")
if self.collection or self.project:
st.write("---")
if self.project_name:
if self.project:
self.role = st.selectbox(
"Choose Bot Role",
options=["Research Assistant", "Editor", "Podcast"],
@ -142,13 +148,23 @@ class BotChatPage(StreamlitBaseClass):
if selected_chat:
st.session_state["chat_key"] = chats[selected_chat]
self.chat = self.get_chat()
if not self.role:
self.role == "Research Assistant"
def remove_old_unsaved_chats(self):
two_weeks_ago = datetime.now() - timedelta(weeks=2)
q = f'FOR doc IN chats FILTER doc.saved == false AND doc.last_updated < "{two_weeks_ago.isoformat()}" RETURN doc'
print_blue(q)
old_chats = self.user_arango.db.aql.execute(
f'FOR doc IN chats RETURN doc'
)
print('test', old_chats)
old_chats = self.user_arango.db.aql.execute(
f'FOR doc IN chats FILTER doc.saved == false AND doc.last_updated < "{two_weeks_ago.isoformat()}" RETURN doc'
)
for chat in old_chats:
print_red(chat["_id"])
self.user_arango.db.collection("chats").delete(chat["_key"])
@ -159,6 +175,7 @@ class SettingsPage(StreamlitBaseClass):
def run(self):
self.update_current_page("Settings")
self.set_profile_picture()
self.use_reasoning_model()
def set_profile_picture(self):
st.markdown("Profile picture")
@ -177,6 +194,25 @@ class SettingsPage(StreamlitBaseClass):
st.success("Profile picture uploaded")
sleep(1)
def use_reasoning_model(self):
"""
Displays a checkbox in the Streamlit interface to enable or disable the reasoning model for generating responses in chats.
Retrieves the current settings and checks if the "use_reasoning_model" key exists. If not, it initializes it to False.
Then, it displays a markdown text and a checkbox for the user to toggle the reasoning model usage.
The updated setting is saved back to the settings.
Returns:
None
"""
settings = self.get_settings()
if "use_reasoning_model" not in settings:
settings["use_reasoning_model"] = False
st.markdown("Use Reasoning Model")
use_reasoning_model = st.checkbox("Use Reasoning Model", value=settings["use_reasoning_model"], help="Use the reasoning model to generate responses in chats. This may take longer to process.")
self.update_settings("use_reasoning_model", use_reasoning_model)
class RSSFeedsPage(StreamlitBaseClass):
def __init__(self, username: str):

@ -68,7 +68,9 @@ class LLM:
self,
system_message: str = "You are an assistant.",
temperature: float = 0.01,
model: Optional[Literal["small", "standard", "vision"]] = "standard",
model: Optional[
Literal["small", "standard", "vision", "reasoning", "tools"]
] = "standard",
max_length_answer: int = 4096,
messages: list[dict] = None,
chat: bool = True,
@ -80,7 +82,7 @@ class LLM:
Args:
system_message (str): The initial system message for the assistant. Defaults to "You are an assistant.".
temperature (float): The temperature setting for the model, affecting randomness. Defaults to 0.01.
model (Optional[Literal["small", "standard", "vision"]]): The model type to use. Defaults to "standard".
model (Optional[Literal["small", "standard", "vision", "reasoning"]]): The model type to use. Defaults to "standard".
max_length_answer (int): The maximum length of the generated answer. Defaults to 4096.
messages (list[dict], optional): A list of initial messages. Defaults to None.
chat (bool): Whether the assistant is in chat mode. Defaults to True.
@ -91,6 +93,9 @@ class LLM:
"""
self.model = self.get_model(model)
self.call_model = (
self.model
) # This is set per call to decide what model that was actually used
self.system_message = system_message
self.options = {"temperature": temperature}
self.messages = messages or [{"role": "system", "content": self.system_message}]
@ -101,23 +106,27 @@ class LLM:
chosen_backend = self.get_least_conn_server()
self.chosen_backend = chosen_backend
# Initialize the client with the host and default headers
credentials = f"{os.getenv('LLM_API_USER')}:{os.getenv('LLM_API_PWD_LASSE')}"
encoded_credentials = base64.b64encode(credentials.encode()).decode()
headers = {
"Authorization": f"Basic {encoded_credentials}",
"Authorization": f"Basic {self.get_credentials()}",
"X-Chosen-Backend": self.chosen_backend,
}
host_url = os.getenv("LLM_API_URL").rstrip("/api/chat/")
self.client = Client(host=host_url, headers=headers)
self.async_client = AsyncClient()
self.host_url = os.getenv("LLM_API_URL").rstrip("/api/chat/")
self.client: Client = Client(host=self.host_url, headers=headers)
self.async_client: AsyncClient = AsyncClient()
def get_credentials(self):
# Initialize the client with the host and default headers
credentials = f"{os.getenv('LLM_API_USER')}:{os.getenv('LLM_API_PWD_LASSE')}"
return base64.b64encode(credentials.encode()).decode()
def get_model(self, model_alias):
models = {
"standard": "LLM_MODEL",
"small": "LLM_MODEL_SMALL",
"vision": "LLM_MODEL_VISION",
"standard_64k": "LLM_MODEL_64K",
"reasoning": "LLM_MODEL_REASONING",
"tools": "LLM_MODEL_TOOLS",
}
return os.getenv(models.get(model_alias, "LLM_MODEL"))
@ -151,39 +160,42 @@ class LLM:
stream: bool = False,
tools: list = None,
images: list = None,
model: Optional[Literal["small", "standard", "vision"]] = None,
model: Optional[
Literal["small", "standard", "vision", "reasoning", "tools"]
] = None,
temperature: float = None,
messages: list[dict] = None,
):
"""
Generate a response based on the provided query and options.
Args:
query (str, optional): The query string to generate a response for.
user_input (str, optional): Additional user input to update the last message.
context (str, optional): Context information to be used in the response.
stream (bool, optional): Whether to stream the response. Defaults to False.
tools (list, optional): List of tools to be used in the response generation.
images (list, optional): List of images to be included in the response.
model (Optional[Literal["small", "standard", "vision"]], optional): The model to be used for response generation.
temperature (float, optional): The temperature setting for the model.
messages (list[dict], optional): A list of messages formated as dictionaries (eg. {'role': 'user', 'content': 'message'}).
Generate a response based on the provided query and context.
Parameters:
query (str): The query string from the user.
user_input (str): Additional user input to be appended to the last message.
context (str): Contextual information to be used in generating the response.
stream (bool): Whether to stream the response.
tools (list): List of tools to be used in generating the response.
images (list): List of images to be included in the response.
model (Optional[Literal["small", "standard", "vision", "tools"]]): The model type to be used.
temperature (float): The temperature setting for the model.
messages (list[dict]): List of previous messages in the conversation.
Returns:
str: The generated response or an error message if an exception occurs.
Raises:
ResponseError: If an error occurs during the response generation.
str: The generated response or an error message if an exception occurs.
"""
print_yellow("GENERATE")
# Prepare the model and temperature
model = self.get_model(model) if model else self.model
if model == self.get_model('tools'):
stream = False
temperature = temperature if temperature else self.options["temperature"]
if messages:
messages = [{'role': i['role'], 'content': re.sub(r"\s*\n\s*", "\n", i['content'])} for i in messages]
messages = [
{"role": i["role"], "content": re.sub(r"\s*\n\s*", "\n", i["content"])}
for i in messages
]
message = messages.pop(-1)
query = message['content']
query = message["content"]
self.messages = messages
else:
# Normalize whitespace and add the query to the messages
@ -198,30 +210,37 @@ class LLM:
self.messages.append(message)
# Prepare headers
headers = {}
if self.chosen_backend:
headers = {"Authorization": f"Basic {self.get_credentials()}"}
if self.chosen_backend and model not in [self.get_model("vision"), self.get_model("tools"), self.get_model("reasoning")]: #TODO Maybe reasoning shouldn't be here.
headers["X-Chosen-Backend"] = self.chosen_backend
if model == self.get_model("small"):
headers["X-Model-Type"] = "small"
if model == self.get_model("tools"):
headers["X-Model-Type"] = "tools"
# Prepare options
options = Options(**self.options)
options.temperature = temperature
if tools:
print_yellow("Tools:", tools)
# Adjust the options for long messages
if self.chat or len(self.messages) > 15000:
if self.chat or len(self.messages) > 15000 and model != self.get_model("tools"):
num_tokens = self.count_tokens() + self.max_length_answer // 2
if num_tokens > 8000:
model = self.get_model("standard_64k")
print_purple("Switching to large model")
headers["X-Model-Type"] = "large"
# Call the client.chat method
try:
print('###########')
self.call_model = model
print()
print('Headers:', headers)
print_yellow('Model:', model)
print()
headers['X-Chosen-Backend'] = 'backend_tools_server'
self.client: Client = Client(host=self.host_url, headers=headers)
response = self.client.chat(
model=model,
messages=self.messages,
@ -230,12 +249,14 @@ class LLM:
options=options,
keep_alive=3600 * 24 * 7,
)
except ResponseError as e:
print_red("Error!")
print(e)
return "An error occurred."
# print_rainbow(response.__dict__)
# If user_input is provided, update the last message
if user_input:
if context:
if len(context) > 2000:
@ -298,28 +319,51 @@ class LLM:
return "Summary generation failed."
def read_stream(self, response):
# Implement streaming response handling if needed
message = ""
"""
Yields tuples of (chunk_type, text). The first tuple is ('thinking', ...)
if in_thinking is True and stops at </think>. After that, yields ('normal', ...)
for the rest of the text.
"""
thinking_buffer = ""
in_thinking = self.call_model == self.get_model("reasoning")
first_chunk = True
prev_content = None
for chunk in response:
if chunk:
content = chunk.message.content
if first_chunk and content.startswith('"'):
content = content[1:]
first_chunk = False
if chunk.done:
if prev_content and prev_content.endswith('"'):
prev_content = prev_content[:-1]
if prev_content:
yield prev_content
break
else:
if prev_content:
yield prev_content
prev_content = content
self.messages.append({"role": "assistant", "content": message.strip('"')})
if not chunk:
continue
content = chunk.message.content
# Remove leading quote if it's the first chunk
if first_chunk and content.startswith('"'):
content = content[1:]
first_chunk = False
if in_thinking:
thinking_buffer += content
if "</think>" in thinking_buffer:
end_idx = thinking_buffer.index("</think>") + len("</think>")
yield ("thinking", thinking_buffer[:end_idx])
remaining = thinking_buffer[end_idx:].strip('"')
if chunk.done and remaining:
yield ("normal", remaining)
break
else:
prev_content = remaining
in_thinking = False
else:
if prev_content:
yield ("normal", prev_content)
prev_content = content
if chunk.done:
if prev_content and prev_content.endswith('"'):
prev_content = prev_content[:-1]
if prev_content:
yield ("normal", prev_content)
break
self.messages.append({"role": "assistant", "content": ""})
async def async_generate(
self,
@ -340,9 +384,9 @@ class LLM:
user_input (str, optional): Additional user input to be included in the response.
context (str, optional): Context information to be used in generating the response.
stream (bool, optional): Whether to stream the response. Defaults to False.
tools (list, optional): List of tools to be used in generating the response.
tools (list, optional): List of tools to be used in generating the response. Will set the model to 'tools'.
images (list, optional): List of images to be included in the response.
model (Optional[Literal["small", "standard", "vision"]], optional): The model to be used for generating the response.
model (Optional[Literal["small", "standard", "vision", "tools"]], optional): The model to be used for generating the response.
temperature (float, optional): The temperature setting for the model.
Returns:
@ -359,47 +403,53 @@ class LLM:
- If user_input is provided, it updates the last message.
- It updates the chosen backend based on the response headers.
- It handles streaming responses and processes the response accordingly.
- It's not neccecary to set model to 'tools' if you provide tools as an argument.
"""
print_yellow("ASYNC GENERATE")
# Normaliz e whitespace and add the query to the messages
query = re.sub(r"\s*\n\s*", "\n", query)
message = {"role": "user", "content": query}
self.messages.append(message)
# Prepare the model and temperature
model = self.get_model(model) if model else self.model
temperature = temperature if temperature else self.options["temperature"]
# Normalize whitespace and add the query to the messages
query = re.sub(r"\s*\n\s*", "\n", query)
message = {"role": "user", "content": query}
# Prepare options
options = Options(**self.options)
options.temperature = temperature
# Handle images if any
# Prepare headers
headers = {}
# Set model depending on the input
if images:
message = self.prepare_images(images, message)
model = self.get_model("vision")
self.messages.append(message)
# Prepare headers
headers = {}
if self.chosen_backend:
elif tools:
model = self.get_model("tools")
headers["X-Model-Type"] = "tools"
tools = [Tool(**tool) if isinstance(tool, dict) else tool for tool in tools]
elif self.chosen_backend and model not in [self.get_model("vision"), self.get_model("tools"), self.get_model("reasoning")]:
headers["X-Chosen-Backend"] = self.chosen_backend
if model == self.get_model("small"):
elif model == self.get_model("small"):
headers["X-Model-Type"] = "small"
# Prepare options
options = Options(**self.options)
options.temperature = temperature
# Prepare tools if any
if tools:
tools = [Tool(**tool) if isinstance(tool, dict) else tool for tool in tools]
# Adjust options for long messages
if self.chat or len(self.messages) > 15000:
num_tokens = self.count_tokens() + self.max_length_answer // 2
if num_tokens > 8000:
if num_tokens > 8000 and model not in [
self.get_model("vision"),
self.get_model("tools"),
]:
model = self.get_model("standard_64k")
headers["X-Model-Type"] = "large"
# Call the async client's chat method
print()
print_rainbow(self.async_client.__dict__)
print(model, headers, )
print()
try:
response = await self.async_client.chat(
model=model,
@ -430,9 +480,10 @@ class LLM:
self.messages[-1] = {"role": "user", "content": user_input}
# Update chosen_backend
self.chosen_backend = self.async_client.last_response.headers.get(
"X-Chosen-Backend"
)
if model not in [self.get_model("vision"), self.get_model("tools"), self.get_model("reasoning")]:
self.chosen_backend = self.async_client.last_response.headers.get(
"X-Chosen-Backend"
)
# Handle streaming response
if stream:

@ -493,7 +493,7 @@ class Processor:
if result == "not_found":
return None
else:
parts = result.split(";", 2)
parts = result.content.split(";", 2)
if len(parts) != 3:
return None
published_date, title, journal = parts

@ -1,6 +1,11 @@
from _llm import LLM
import os, re
import os
import re
import random
from typing import List, Dict
from atproto import models
from types import SimpleNamespace
import requests
from atproto import (
CAR,
AtUri,
@ -9,10 +14,14 @@ from atproto import (
firehose_models,
models,
parse_subscribe_repos_message,
models,
)
from colorprinter.print_color import *
from datetime import datetime
from semantic_text_splitter import MarkdownSplitter
from env_manager import set_env
set_env()
@ -29,18 +38,30 @@ class Bot:
# Create a client instance to interact with Bluesky
self.username = os.getenv("BLUESKY_USERNAME")
self.max_length_answer = 280
system_message = '''
You are a research assistant bot chatting with a user on Bluesky, a social media platform similar to Twitter.
Your speciality is electric cars, and you will use facts in articles to answer the questions
Use ONLY the information in the articles to answer the questions. Do not add any additional information or speculation.
IF you don't know the answer, you can say "I don't know" or "I'm not sure". You can also ask the user to specify the question.
Your answers should be concise and not exceed 250 characters to fit the character limit on Bluesky.
Answer in English.
Your answers should be concise and NOT EXCEED 1000 CHARACTERS to fit the character limit on Bluesky.
Always answer in English.
'''
self.llm: LLM = LLM(system_message=system_message, max_length_answer=200)
self.llm: LLM = LLM(system_message=system_message)
post_maker_system_message = f'''
You will get a text and you have to format it for Bluesky, a plaform similar to Twitter.
You should format the text in a thread of posts with a maximum of {self.max_length_answer} characters per post.
It's VERY important to keep the text as close as possible to the original text.
Format the thread without any additional formatation, just plain text.
Add "---" to separate the posts. Don't add a counter of any type, that will be added automatically.
'''
self.post_maker = LLM(system_message=post_maker_system_message, model="small")
self.client = Client()
self.client.login(self.username, os.getenv("BLUESKY_PASSWORD"))
self.chat = None
self.pds_url = 'https://bsky.social'
print("🐟 Bot is listening")
@ -82,6 +103,7 @@ class Bot:
return entries
def process_operation(
self,
op: models.ComAtprotoSyncSubscribeRepos.RepoOp,
@ -117,8 +139,53 @@ class Bot:
self.traverse_thread(posts_in_thread.thread)
self.chat.thread_posts.sort(key=lambda x: x["timestamp"])
self.make_llm_messages()
print_purple(self.chat.messages)
answer = self.llm.generate(messages=self.chat.messages)
self.client.send_post(f'@{poster_username} {answer.content} ')
print_green(answer.content)
print_purple('Length of answer:', len(answer.content))
answer_as_thread = False
if len(answer.content) > self.max_length_answer:
#Save the answer as a unique file as html at bluesky_bot_answers
filename_answer = f'bluesky_bot_answers/{record["cid"]}_{random.randint(a=10000, b=99000)}.html'
with open(filename_answer, 'w') as f:
f.write(answer.content)
formated_answer = self.post_maker.generate(query=f"Format the text below as a thread of posts with a maximum of {self.max_length_answer} characters per post.\n\n{answer.content}")
# # Optionally can also have the splitter not trim whitespace for you
# splitter = MarkdownSplitter(self.max_length_answer)
# chunks = splitter.chunks(answer.content)
chunks = formated_answer.content.split('---')
print_yellow('Formated answer')
print_rainbow(chunks)
answer_as_thread = True
record_obj = SimpleNamespace(cid=record["cid"], uri=record["uri"])
parent = models.create_strong_ref(record_obj)
root_obj = SimpleNamespace(cid=record['reply']['root']["cid"], uri=record['reply']['root']["uri"])
root_post = models.create_strong_ref(root_obj)
reply_ref = models.AppBskyFeedPost.ReplyRef(parent=parent, root=root_post)
mention_handle = f"@{poster_username}"
if not answer_as_thread:
text = f"{mention_handle} {answer.content}"
facets = self.parse_facets(text)
print('Handle:', mention_handle)
print(f"Facets")
print(facets)
sent_answer = self.client.send_post(text=text, facets=facets, reply_to=reply_ref, langs=["en-US"])
else:
for n, chunk in enumerate(chunks, start=1):
chunk = chunk.strip()
text = f"{chunk}\n({n}/{len(chunks)})"
facets = self.parse_facets(text)
sent_answer = self.client.send_post(text=text, facets=facets, reply_to=reply_ref, langs=["en-US"])
parent = models.create_strong_ref(sent_answer)
reply_ref = models.AppBskyFeedPost.ReplyRef(parent=parent, root=root_post)
text = f'The answers above are a blueskyified version of the original answer. The original answer can be found at https://sci.assistant.fish/answers/{filename_answer}'
reply_ref = models.AppBskyFeedPost.ReplyRef(parent=parent, root=root_post)
sent_answer = self.client.send_post(text=text, reply_to=reply_ref, langs=["en-US"])
if op.action == "delete":
@ -176,7 +243,6 @@ class Bot:
Returns:
None
"""
print_rainbow(self.chat.thread_posts)
start = False
for i in self.chat.thread_posts:
# Make the messages start with a message mentioning the bot
@ -190,7 +256,7 @@ class Bot:
and len(self.chat.messages) > 0
and self.chat.messages[-1] != self.chat.bot_username
):
i['text'] = i['text'].replace(f"@{self.chat.poster_username}", "").strip()
i['text'] = i['text'].replace(f"@{self.chat.bot_username}", "").strip()
self.chat.messages.append({"role": "assistant", "content": i["text"]})
elif i["user"] == self.chat.poster_username:
i['text'] = i['text'].replace(f"@{self.chat.bot_username}", "").strip()
@ -198,6 +264,8 @@ class Bot:
self.chat.messages[-1]['content'] += f"\n\n{i['text']}"
else:
self.chat.messages.append({"role": "user", "content": i["text"]})
def on_message_handler(self, message: firehose_models.MessageFrame) -> None:
# Callback function that handles incoming messages from the firehose subscription
@ -223,9 +291,44 @@ class Bot:
self.process_operation(op, car, commit)
# Parse facets from text and resolve the handles to DIDs
def parse_facets(self, text: str) -> List[Dict]:
facets = []
for m in self.parse_mentions(text):
resp = requests.get(
self.pds_url + "/xrpc/com.atproto.identity.resolveHandle",
params={"handle": m["handle"]},
)
# If the handle can't be resolved, just skip it!
# It will be rendered as text in the post instead of a link
if resp.status_code == 400:
continue
did = resp.json()["did"]
facets.append({
"index": {
"byteStart": m["start"],
"byteEnd": m["end"],
},
"features": [{"$type": "app.bsky.richtext.facet#mention", "did": did}],
})
return facets
def parse_mentions(self, text: str) -> List[Dict]:
spans = []
# Simplified regex to match handles
mention_regex = rb"(@[a-zA-Z0-9._-]+)"
text_bytes = text.encode("UTF-8")
for m in re.finditer(mention_regex, text_bytes):
spans.append({
"start": m.start(1),
"end": m.end(1),
"handle": m.group(1)[1:].decode("UTF-8")
})
return spans
def main() -> None:
bot = Bot()
bot.answer_message("Hello, world!")
Bot()
if __name__ == "__main__":

@ -311,6 +311,77 @@ class ProjectsPage(StreamlitBaseClass):
class Project(StreamlitBaseClass):
"""
A class to represent a project in the Streamlit application.
Attributes:
-----------
username : str
The username of the project owner.
project_name : str
The name of the project.
user_arango : ArangoDB
The ArangoDB instance for the user.
name : str
The name of the project.
description : str
The description of the project.
collections : list
A list of collections associated with the project.
notes : list
A list of notes associated with the project.
note_keys_hash : int
A hash value representing the keys of the notes.
settings : dict
A dictionary of settings for the project.
notes_summary : str
A summary of the notes in the project.
Methods:
--------
load_project():
Loads the project data from the ArangoDB.
update_project():
Updates the project data in the ArangoDB.
add_collections(collections):
Adds multiple collections to the project.
add_collection(collection_name):
Adds a single collection to the project.
add_note(note):
Adds a note to the project.
add_interview(interview, intervievees, interviewer, date_of_interveiw):
Adds an interview to the project.
add_interview_transcript(transcript, filename, intervievees, interviewer, date_of_interveiw):
Adds an interview transcript to the project.
transcribe(uploaded_file):
Transcribes an uploaded audio file.
format_transcription(transcription):
Formats the transcription text.
delete_note(note_id):
Deletes a note from the project.
delete_interview(interview_id):
Deletes an interview from the project.
update_notes_hash():
Updates the hash value of the notes.
make_project_notes_hash():
Generates a hash value for the project notes.
create_notes_summary():
Creates a summary of the project notes.
analyze_image(image_base64, text):
Analyzes an image and generates a description.
process_uploaded_notes(files):
Processes uploaded note files.
file2img(file):
Converts an uploaded file to an image.
convert_image_to_pdf(img):
Converts an image to a PDF file.
get_wikipedia_data(page_url):
Fetches data from a Wikipedia page.
process_wikipedia_data(wiki_data, wiki_url):
Processes Wikipedia data and adds it to the project.
process_dois(article_collection_name, text, dois):
Processes DOIs and adds the corresponding articles to the project.
"""
def __init__(self, username: str, project_name: str, user_arango: ArangoDB):
super().__init__(username=username)
self.name = project_name
@ -332,6 +403,7 @@ class Project(StreamlitBaseClass):
bind_vars={"name": self.name},
)
project = next(project_cursor, None)
if not project:
raise ValueError(f"Project '{self.name}' not found.")
self._key = project["_key"]
@ -359,10 +431,12 @@ class Project(StreamlitBaseClass):
def add_collections(self, collections):
self.collections.extend(collections)
self.collections = list(set(self.collections))
self.update_project()
def add_collection(self, collection_name):
self.collections.append(collection_name)
self.collections = list(set(self.collections))
self.update_project()
def add_note(self, note: dict):
@ -578,7 +652,7 @@ class Project(StreamlitBaseClass):
notes_string = "\n---\n".join(notes)
llm = LLM(model="small")
query = get_note_summary_prompt(self, notes_string)
summary = llm.generate(query)
summary = llm.generate(query).content
print_purple("New summary of notes:", summary)
self.notes_summary = summary
self.update_session_state()

@ -170,11 +170,17 @@ def get_image_system_prompt(project):
return re.sub(r"\s*\n\s*", "\n", system_message)
def get_tools_prompt(user_input):
return f'''User message: "{user_input}"
Choose one or many tools in order to answer the message. It's important that you think of what information (if any) is needed to make a good answer.
prompt = f'''User message: "{user_input}"
You have to choose one or many tools in order to answer the message. It's important that you think of what information (if any) is needed to make a good answer.
Make sure to read the description of the tools carefully before choosing!
You can ONLY chose a tool you are provided with, don't make up a tool!
You HAVE TO CHOOSE A TOOL, even if you think you can answer without it. Don't answer the question without choosing a tool.
'''
# prompt = f'''User message: "{user_input}"
# What tool(s) would you use to answer the question? Choose one or more tools that you think would be most helpful.
# Answer with a tool, not a response to the user's message.
# '''
return re.sub(r"\s*\n\s*", "\n", prompt)
def get_summary_prompt(text, is_sci):

@ -4,20 +4,32 @@ from _base_class import StreamlitBaseClass, BaseClass
from _llm import LLM
from prompts import *
from colorprinter.print_color import *
from llm_tools import ToolRegistry
from ollama._types import Message as OllamaMessage
from ollama._types import ChatResponse as OllamaChatResponse
from projects_page import Project
class Chat(StreamlitBaseClass):
def __init__(self, username=None, **kwargs):
def __init__(self, username=None, role=None, key=None, **kwargs):
super().__init__(username=username, **kwargs)
self.name = kwargs.get("name", None)
self.chat_history = kwargs.get("chat_history", [])
def add_message(self, role, content):
if isinstance(content, str):
content = content.strip().strip('"')
elif isinstance(content, dict):
content = content["content"].strip().strip('"')
else:
try:
content = content.get("content", "").strip().strip('"')
except:
content = content
self.chat_history.append(
{
"role": role,
"content": content.strip().strip('"'),
"content": content,
"role_type": self.role,
}
)
@ -90,6 +102,8 @@ class StreamlitChat(Chat):
self.collection = kwargs.get("collection", None)
self.message_attachments = None
self.last_updated = datetime.now().isoformat()
self._key = _key
self.role = role
if self._key:
chat = self.user_arango.db.collection("chats").get(self._key)
@ -108,7 +122,6 @@ class StreamlitChat(Chat):
}
)["_key"]
def show_chat_history(self):
for message in self.chat_history:
if message["role"] not in ["user", "assistant"]:
@ -139,22 +152,30 @@ class StreamlitChat(Chat):
else:
avatar = None
return avatar
class Bot(BaseClass):
def __init__(self, username: str, chat: Chat = None, tools: list = None, **kwargs):
super().__init__(username=username, **kwargs)
# Use the passed in chat or create a new Chat
self.chat = chat if chat else Chat(username=username, role="Research Assistant")
# Store or set up project/collection if available
self.project = kwargs.get("project", None)
self.project: Project = kwargs.get("project", None)
self.collection = kwargs.get("collection", None)
if self.collection and not isinstance(self.collection, list):
self.collection = [self.collection]
elif self.project:
self.collection = self.project.collections
# Load articles in the collections
self.arango_ids = []
# Bots to be initiated later
self.chatbot = None
self.helperbot = None
self.toolbot = None
self.initiate_bots()
if self.collection:
for c in self.collection:
for _id in self.user_arango.db.aql.execute(
@ -168,6 +189,32 @@ class Bot(BaseClass):
):
self.arango_ids.append(_id)
# Map tool names to functions
tool_mapping = {
"fetch_other_documents_tool": self.fetch_other_documents_tool,
"fetch_science_articles_tool": self.fetch_science_articles_tool,
"fetch_science_articles_and_other_documents_tool": self.fetch_science_articles_and_other_documents_tool,
"fetch_notes_tool": self.fetch_notes_tool,
"conversational_response_tool": self.conversational_response_tool,
}
# Convert tool names to function references
if tools:
self.tools = [tool_mapping[tool] if isinstance(tool, str) else tool for tool in tools]
else:
self.tools = None
# Store other kwargs
for arg in kwargs:
setattr(self, arg, kwargs[arg])
# # Initiate the bots
# try:
# self.initiate_bots()
# except Exception as e:
# print_red(f"Error initiating bots: {e}")
def initiate_bots(self):
# A standard LLM for normal chat
self.chatbot = LLM(messages=self.chat.chat_history2bot())
# A helper bot for generating queries or short prompts
@ -182,24 +229,16 @@ class Bot(BaseClass):
self.toolbot = LLM(
temperature=0,
system_message="""
You are an assistant bot helping an answering bot to answer a user's messages.
Your task is to choose one or multiple tools that will help the answering bot to provide the user with the best possible answer.
You should NEVER directly answer the user. You MUST choose a tool.
You are an helpful assistant with some tools.
Your task is to choose one or multiple tools to answering a user's query.
ALWAYS choose one or more of the provided tools.
DON'T come up with your own tools, only use the ones provided.
""",
# system_message='Use one of the provided tools to help the answering bot to answer the user. Do not answer directly. Use the "tool_calls" field in your answer.',
chat=False,
model="small",
model="tools"
)
# Load or register the passed-in tools
if tools:
self.tools = ToolRegistry.get_tools(tools=tools)
else:
self.tools = ToolRegistry.get_tools()
# Store other kwargs
for arg in kwargs:
setattr(self, arg, kwargs[arg])
def get_chunks(
self,
user_input,
@ -246,9 +285,7 @@ class Bot(BaseClass):
if len(sources) >= n_sources:
break
if len(closest_chunks) < n_results:
remaining_chunks = [
c for c in combined_chunks if c not in closest_chunks
]
remaining_chunks = [c for c in combined_chunks if c not in closest_chunks]
closest_chunks.extend(remaining_chunks[: n_results - len(closest_chunks)])
# Now fetch real metadata from Arango
@ -289,7 +326,7 @@ class Bot(BaseClass):
return ""
for tool in response.get("tool_calls"):
function_name = tool.function.get('name')
function_name = tool.function.get("name")
arguments = tool.function.arguments
arguments["query"] = user_input
@ -301,49 +338,59 @@ class Bot(BaseClass):
]:
chunks = getattr(self, function_name)(**arguments)
bot_responses.append(
self.generate_from_chunks(user_input, chunks).strip('"')
self.generate_from_chunks(user_input, chunks)
)
elif function_name == "fetch_notes_tool":
notes = getattr(self, function_name)()
bot_responses.append(
self.generate_from_notes(user_input, notes).strip('"')
self.generate_from_notes(user_input, notes)
)
elif function_name == "conversational_response_tool":
response: OllamaMessage = getattr(self, function_name)(user_input)
print_green('Conversation response:', response)
bot_responses.append(
getattr(self, function_name)(user_input).strip('"')
response.content.strip('"')
)
return "\n\n".join(bot_responses)
def process_user_input(self, user_input, content_attachment=None):
# Add user message
self.chat.add_message("user", user_input)
if not content_attachment:
prompt = get_tools_prompt(user_input)
response = self.toolbot.generate(prompt, tools=self.tools, stream=False)
if response.get("tool_calls"):
bot_response = self.answer_tool_call(response, user_input)
else:
# Just respond directly
bot_response = response.content.strip('"')
else:
# If there's an attachment, do something minimal
bot_response = "Content attachment received (Base Bot)."
# Add assistant message
if self.chat.chat_history[-1]["role"] != "assistant":
self.chat.add_message("assistant", bot_response)
# Update in Arango
self.chat.update_in_arango()
return bot_response
print_rainbow(i for i in bot_responses)
return "\n\n".join(i for i in bot_responses)
# def process_user_input(self, user_input, content_attachment=None):
# # Add user message
# self.chat.add_message("user", user_input)
# print('content_attachment', content_attachment)
# if not content_attachment:
# prompt = get_tools_prompt(user_input)
# print('TOOLS PROMOT:', prompt)
# print_red('\nToolbot system message:', self.toolbot.system_message)
# response = self.toolbot.generate(prompt, tools=self.tools, stream=False)
# print_rainbow(response)
# if response.get("tool_calls"):
# bot_response = self.answer_tool_call(response, user_input)
# else:
# # Just respond directly
# bot_response = response.content.strip('"')
# else:
# # If there's an attachment, do something minimal
# bot_response = "Content attachment received (Base Bot)."
# # Add assistant message
# if self.chat.chat_history[-1]["role"] != "assistant":
# self.chat.add_message("assistant", bot_response)
# # Update in Arango
# self.chat.update_in_arango()
# return bot_response
def generate_from_notes(self, user_input, notes):
# No Streamlit calls
notes_string = ""
for note in notes:
notes_string += f"\n# {note.get('title','No title')}\n{note.get('content','')}\n---\n"
prompt = get_chat_prompt(user_input, content_string=notes_string, role=self.chat.role)
notes_string += (
f"\n# {note.get('title','No title')}\n{note.get('content','')}\n---\n"
)
prompt = get_chat_prompt(
user_input, content_string=notes_string, role=self.chat.role
)
return self.chatbot.generate(prompt, stream=True)
def generate_from_chunks(self, user_input, chunks):
@ -355,10 +402,10 @@ class Bot(BaseClass):
notes = group["chunks"][0]["metadata"]["user_notes"]
user_notes_string = f'\n\nUser notes:\n"""\n{notes}\n"""\n\n'
docs = "\n(...)\n".join([c["document"] for c in group["chunks"]])
chunks_string += (
f"\n# {title}\n## Article #{group['article_number']}\n{user_notes_string}{docs}\n---\n"
)
prompt = get_chat_prompt(user_input, content_string=chunks_string, role=self.chat.role)
chunks_string += f"\n# {title}\n## Article #{group['article_number']}\n{user_notes_string}{docs}\n---\n"
prompt = get_chat_prompt(
user_input, content_string=chunks_string, role=self.chat.role
)
return self.chatbot.generate(prompt, stream=True)
def run(self):
@ -372,7 +419,6 @@ class Bot(BaseClass):
)
return list(notes)
@ToolRegistry.register
def fetch_science_articles_tool(self, query: str, n_documents: int):
"""
"Fetches information from scientific articles. Use this tool when the user is looking for information from scientific articles."
@ -384,7 +430,7 @@ class Bot(BaseClass):
Returns:
list: A list of chunks containing information from the fetched scientific articles.
"""
print_purple('Query:', query)
print_purple("Query:", query)
n_documents = int(n_documents)
if n_documents < 3:
@ -395,7 +441,6 @@ class Bot(BaseClass):
query, collections=["sci_articles"], n_results=n_documents
)
@ToolRegistry.register
def fetch_other_documents_tool(self, query: str, n_documents: int):
"""
Fetches information from other documents based on the user's query.
@ -421,7 +466,6 @@ class Bot(BaseClass):
n_results=n_documents,
)
@ToolRegistry.register
def fetch_science_articles_and_other_documents_tool(
self, query: str, n_documents: int
):
@ -449,7 +493,6 @@ class Bot(BaseClass):
n_results=n_documents,
)
@ToolRegistry.register
def fetch_notes_tool(bot):
"""
Fetches information from the project notes when you as an editor need context from the project notes to understand other information. ONLY use this together with other tools! No arguments needed.
@ -460,14 +503,14 @@ class Bot(BaseClass):
assert isinstance(bot, Bot), "The first argument must be a Bot object."
return bot.get_notes()
@ToolRegistry.register
def conversational_response_tool(self, query: str):
"""
Generate a conversational response to a user's query.
This method is designed to provide a short and conversational response
without fetching additional data. It should be used only when it is clear
without fetching additional data. It should be used ONLY when it is clear
that the user is engaging in small talk (like saying 'hi') and not seeking detailed information.
If the user is asking for informaiton or a qualified answer, don't use this tool!
Args:
query (str): The user's message to which the bot should respond.
@ -482,24 +525,27 @@ class Bot(BaseClass):
Don't answer with anything you're not sure of!
"""
result = (
self.chatbot.generate(query, stream=True)
if self.chatbot
else self.llm.generate(query, stream=True)
)
return result
return self.chatbot.generate(query, stream=False)
class StreamlitBot(Bot):
def __init__(self, username: str, chat: StreamlitChat = None, tools: list = None, **kwargs):
super().__init__(username, chat, tools, **kwargs)
def __init__(
self, username: str, chat: StreamlitChat = None, tools: list = None, **kwargs
):
super().__init__(username=username, chat=chat, tools=tools, **kwargs)
# For Streamlit, we can override or add attributes
if 'llm_chosen_backend' not in st.session_state:
st.session_state['llm_chosen_backend'] = None
if "llm_chosen_backend" not in st.session_state:
st.session_state["llm_chosen_backend"] = None
self.chatbot.chosen_backend = st.session_state["llm_chosen_backend"]
if not st.session_state["llm_chosen_backend"]:
st.session_state["llm_chosen_backend"] = self.chatbot.chosen_backend
self.chatbot.chosen_backend = st.session_state['llm_chosen_backend']
if not st.session_state['llm_chosen_backend']:
st.session_state['llm_chosen_backend'] = self.chatbot.chosen_backend
settings = self.get_settings()
print("SETTINGS:", settings)
if settings.get("use_reasoning_model", False):
self.chatbot.model = self.chatbot.get_model("reasoning")
def run(self):
# Example Streamlit run loop
@ -515,7 +561,10 @@ class StreamlitBot(Bot):
if attached_file:
if attached_file.type == "application/pdf":
import fitz
pdf_document = fitz.open(stream=attached_file.read(), filetype="pdf")
pdf_document = fitz.open(
stream=attached_file.read(), filetype="pdf"
)
pdf_text = ""
for page_num in range(len(pdf_document)):
page = pdf_document.load_page(page_num)
@ -524,7 +573,9 @@ class StreamlitBot(Bot):
elif attached_file.type in ["image/png", "image/jpeg"]:
self.chat.message_attachments = "image"
content_attachment = attached_file.read()
with st.chat_message("user", avatar=self.chat.get_avatar(role="user")):
with st.chat_message(
"user", avatar=self.chat.get_avatar(role="user")
):
st.image(content_attachment)
with st.chat_message("user", avatar=self.chat.get_avatar(role="user")):
@ -540,28 +591,51 @@ class StreamlitBot(Bot):
self.process_user_input(text_input, content_attachment)
def get_settings(self):
return self.user_arango.db.document("settings/settings")
def process_user_input(self, user_input, content_attachment=None):
# We override to show messages in Streamlit instead of just storing
self.chat.add_message("user", user_input)
if not content_attachment:
prompt = get_tools_prompt(user_input)
print_rainbow(self.toolbot.__dict__)
response = self.toolbot.generate(prompt, tools=self.tools, stream=False)
if response.get("tool_calls"):
bot_response = self.answer_tool_call(response, user_input)
else:
bot_response = response.content.strip('"')
with st.chat_message("assistant", avatar=self.chat.get_avatar(role="assistant")):
st.write(bot_response)
with st.chat_message(
"assistant", avatar=self.chat.get_avatar(role="assistant")
):
st.write(bot_response)
else:
with st.chat_message("assistant", avatar=self.chat.get_avatar(role="assistant")):
with st.chat_message(
"assistant", avatar=self.chat.get_avatar(role="assistant")
):
with st.spinner("Reading the content..."):
if self.chat.message_attachments == "image":
prompt = get_chat_prompt(user_input, role=self.chat.role, image_attachment=True)
bot_resp = self.chatbot.generate(prompt, stream=False, images=[content_attachment], model="vision")
prompt = get_chat_prompt(
user_input, role=self.chat.role, image_attachment=True
)
bot_resp = self.chatbot.generate(
prompt,
stream=False,
images=[content_attachment],
model="vision",
)
if isinstance(bot_resp, dict):
bot_resp = bot_resp.get("content", "")
elif isinstance(bot_resp, OllamaMessage):
bot_resp = bot_resp.content
st.write(bot_resp)
bot_response = bot_resp
else:
prompt = get_chat_prompt(user_input, content_attachment=content_attachment, role=self.chat.role)
prompt = get_chat_prompt(
user_input,
content_attachment=content_attachment,
role=self.chat.role,
)
response = self.chatbot.generate(prompt, stream=True)
bot_response = st.write_stream(response)
@ -570,42 +644,116 @@ class StreamlitBot(Bot):
self.chat.update_in_arango()
def answer_tool_call(self, response, user_input):
bot_responses = []
for tool in response.get("tool_calls", []):
function_name = tool.function.get('name')
arguments = tool.function.arguments
arguments["query"] = user_input
with st.chat_message("assistant", avatar=self.chat.get_avatar(role="assistant")):
if function_name in [
"fetch_other_documents_tool",
"fetch_science_articles_tool",
"fetch_science_articles_and_other_documents_tool",
]:
chunks = getattr(self, function_name)(**arguments)
response_text = self.generate_from_chunks(user_input, chunks)
bot_response = st.write_stream(response_text).strip('"')
if chunks:
sources = "###### Sources:\n"
for title, group in chunks.items():
j = group["chunks"][0]["metadata"].get("journal", "No Journal")
d = group["chunks"][0]["metadata"].get("published_date", "No Date")
sources += f"[{group['article_number']}] **{title}** :gray[{j} ({d})]\n"
st.markdown(sources)
bot_response += f"\n\n{sources}"
bot_responses.append(bot_response)
elif function_name == "fetch_notes_tool":
notes = getattr(self, function_name)()
response_text = self.generate_from_notes(user_input, notes)
bot_responses.append(st.write_stream(response_text).strip('"'))
elif function_name == "conversational_response_tool":
response_text = getattr(self, function_name)(user_input)
bot_responses.append(st.write_stream(response_text).strip('"'))
# def answer_tool_call(self, response, user_input): #! This should be in the Basse ChatBot?
# bot_responses = []
# for tool in response.get("tool_calls", []):
# function_name = tool.function.get("name")
# arguments = tool.function.arguments
# arguments["query"] = user_input
# print("Function name:", function_name)
# with st.chat_message(
# "assistant", avatar=self.chat.get_avatar(role="assistant")
# ):
# if function_name in [
# "fetch_other_documents_tool",
# "fetch_science_articles_tool",
# "fetch_science_articles_and_other_documents_tool",
# ]:
# chunks = getattr(self, function_name)(**arguments)
# response_text = self.generate_from_chunks(user_input, chunks)
# # Separate thinking chunk and normal chunk
# print_red("Model:", self.chatbot.model)
# if self.chatbot.model == "reasoning":
# bot_response = self.write_reasoning(response_text)
# else:
# bot_response = st.write_normal(response_text)
# bot_responses.append(bot_response)
# if chunks:
# sources = "###### Sources:\n"
# for title, group in chunks.items():
# j = group["chunks"][0]["metadata"].get(
# "journal", "No Journal"
# )
# d = group["chunks"][0]["metadata"].get(
# "published_date", "No Date"
# )
# sources += f"[{group['article_number']}] **{title}** :gray[*{j}* ({d})] \n"
# st.markdown(sources)
# bot_response += f"\n\n{sources}"
# bot_responses.append(bot_response)
# elif function_name == "fetch_notes_tool":
# notes = getattr(self, function_name)()
# response_text = self.generate_from_notes(user_input, notes)
# bot_responses.append(st.write_stream(response_text).strip('"'))
# elif function_name == "conversational_response_tool":
# response_text = getattr(self, function_name)(user_input)
# print(
# "###",
# self.chatbot.call_model,
# self.chatbot.get_model("reasoning"),
# )
# if self.chatbot.call_model == self.chatbot.get_model("reasoning"):
# print_blue("REASONING MODEL!")
# bot_response = self.write_reasoning(response_text).strip('"')
# else:
# bot_responses.append(st.write_stream(response_text))
# return "\n\n".join(bot_responses)
def write_reasoning(self, response_text):
chunks_iter = iter(response_text) # convert generator to iterator
try:
first_mode, first_text = next(chunks_iter) # get first chunk
except StopIteration:
# no chunks at all
first_mode, first_text = None, None
print_purple("FIRST MODE:", first_mode, first_text)
# if it's thinking, show that in an expander
if first_mode == "thinking":
with st.expander("How the bot has been reasoning"):
st.write(first_text.replace("<think>", "").replace("</think>", ""))
# define a generator for the rest
def rest_gen():
for _, text in chunks_iter:
yield text
bot_response = st.write_stream(rest_gen())
return bot_response
return "\n\n".join(bot_responses)
else:
def full_gen():
if first_mode:
yield (first_mode, first_text)
for mode, text in chunks_iter:
yield (mode, text)
bot_response = st.write_stream(full_gen()).strip('"')
def write_normal(self, response_text):
chunks_iter = iter(response_text) # convert generator to iterator
try:
first_mode, first_text = next(chunks_iter) # get first chunk
except StopIteration:
# no chunks at all
first_mode, first_text = None, None
def full_gen():
if first_mode:
yield (first_mode, first_text)
for mode, text in chunks_iter:
yield (mode, text)
bot_response = st.write_stream(full_gen()).strip('"')
return bot_response
def generate_from_notes(self, user_input, notes):
with st.spinner("Reading project notes..."):
@ -641,47 +789,48 @@ class StreamlitBot(Bot):
# We can show a spinner or messages too
with st.spinner("Fetching notes..."):
return super().get_notes()
class EditorBot(StreamlitBot(Bot)):
def __init__(self, chat: Chat, username: str, **kwargs):
super().__init__(chat=chat, username=username, **kwargs)
self.role = "Editor"
self.tools = ToolRegistry.get_tools()
self.chatbot = LLM(
system_message=get_editor_prompt(kwargs.get("project")),
messages=self.chat.chat_history2bot(),
chosen_backend=kwargs.get("chosen_backend"),
)
class ResearchAssistantBot(StreamlitBot(Bot)):
def __init__(self, chat: Chat, username: str, **kwargs):
super().__init__(chat=chat, username=username, **kwargs)
class EditorBot(StreamlitBot):
def __init__(self, username: str, chat: Chat, **kwargs):
super().__init__(username=username, chat=chat, **kwargs)
self.role = "Editor"
self.tools = [self.fetch_notes_tool, self.fetch_other_documents_tool]
# self.chatbot = LLM(
# system_message=get_editor_prompt(kwargs.get("project")),
# messages=self.chat.chat_history2bot(),
# chosen_backend=kwargs.get("chosen_backend"),
# )
print_purple("MODEL FOR EDITOR BOT:", self.chatbot.model)
class ResearchAssistantBot(StreamlitBot):
def __init__(self, username: str, chat: Chat, **kwargs):
super().__init__(username=username, chat=chat, **kwargs)
self.role = "Research Assistant"
self.chatbot = LLM(
system_message=get_assistant_prompt(),
temperature=0.1,
messages=self.chat.chat_history2bot(),
)
# self.chatbot = LLM(
# system_message=get_assistant_prompt(),
# temperature=0.1,
# messages=self.chat.chat_history2bot(),
# )
self.tools = [
self.fetch_science_articles_tool,
self.fetch_science_articles_and_other_documents_tool,
]
self.fetch_science_articles_tool,
self.fetch_science_articles_and_other_documents_tool,
]
class PodBot(StreamlitBot(Bot)):
class PodBot(StreamlitBot):
"""Two LLM agents construct a conversation using material from science articles."""
def __init__(
self,
username: str,
chat: Chat,
subject: str,
username: str,
instructions: str = None,
**kwargs,
):
super().__init__(chat=chat, username=username, **kwargs)
super().__init__(username=username, chat=chat, **kwargs)
self.subject = subject
self.instructions = instructions
self.guest_name = kwargs.get("name_guest", "Merit")
@ -776,19 +925,13 @@ class PodBot(StreamlitBot(Bot)):
self.chat.show_chat_history()
class HostBot(StreamlitBot(Bot)):
class HostBot(StreamlitBot):
def __init__(
self, chat: Chat, subject: str, username: str, instructions: str, **kwargs
):
super().__init__(chat=chat, username=username, **kwargs)
self.chat.role = kwargs.get("role", "Host")
self.tools = ToolRegistry.get_tools(
tools=[
self.fetch_notes_tool,
self.conversational_response_tool,
# "fetch_other_documents", #TODO Should this be included?
]
)
self.tools = [self.fetch_notes_tool, self.conversational_response_tool]
self.instructions = instructions
self.llm = LLM(
system_message=f'''
@ -801,7 +944,7 @@ class HostBot(StreamlitBot(Bot)):
"""
If the experts' answer is complicated, try to make a very brief summary of it for the audience to understand. You can also ask follow-up questions to clarify the answer, or ask for examples.
''',
messages=self.chat.chat_history2bot()
messages=self.chat.chat_history2bot(),
)
self.toolbot = LLM(
temperature=0,
@ -811,23 +954,21 @@ class HostBot(StreamlitBot(Bot)):
Often "conversational_response_tool" is enough, but sometimes project notes are needed.
Make sure to read the description of the tools carefully!""",
chat=False,
model="small",
model="tools"
)
def generate(self, query):
return self.llm.generate(query)
class GuestBot(StreamlitBot(Bot)):
class GuestBot(StreamlitBot):
def __init__(self, chat: Chat, subject: str, username: str, **kwargs):
super().__init__(chat=chat, username=username, **kwargs)
self.chat.role = kwargs.get("role", "Guest")
self.tools = ToolRegistry.get_tools(
tools=[
self.fetch_notes_tool,
self.fetch_science_articles_tool,
]
)
self.tools = [
self.fetch_notes_tool,
self.fetch_science_articles_tool,
]
self.llm = LLM(
system_message=f"""
@ -839,13 +980,13 @@ class GuestBot(StreamlitBot(Bot)):
Lists are also not recommended, instead use "for the first reason", "secondly", etc.
Instead, use "..." to indicate a pause, "-" to indicate a break in the sentence, as if you were speaking.
""",
messages=self.chat.chat_history2bot()
messages=self.chat.chat_history2bot(),
)
self.toolbot = LLM(
temperature=0,
system_message=f"You are an assistant to an expert on {subject}. Choose one or many tools to use in order to assist the expert in answering questions. Make sure to read the description of the tools carefully.",
chat=False,
model="small",
model="tools"
)
def generate(self, query):

@ -1,6 +1,7 @@
import streamlit as st
from time import sleep
from colorprinter.print_color import *
from _classes import BotChatPage
def Projects():
"""
Function to handle the Projects page.
@ -15,7 +16,8 @@ def Bot_Chat():
"""
Function to handle the Chat Bot page.
"""
from _classes import BotChatPage
print_blue("Bot Chat")
sleep(0.1)
if 'Bot Chat' not in st.session_state:
st.session_state['Bot Chat'] = {}

Loading…
Cancel
Save