Added Research etc

main
lasseedfast 8 months ago
parent 83044a905b
commit ba7eaaed2a
  1. 1
      _arango.py
  2. 102
      _base_class.py
  3. 2
      _chromadb.py
  4. 314
      _classes.py
  5. 53
      _llm.py
  6. 1328
      agent_research.py
  7. 71
      article2db.py
  8. 236
      chat_page.py
  9. 105
      collections_page.py
  10. 103
      feed_page.py
  11. 6
      ollama_response_classes.py
  12. 95
      projects_page.py
  13. 18
      prompts.py
  14. 406
      research_page.py
  15. 55
      settings_page.py
  16. 8
      streamlit_app.py
  17. 614
      streamlit_chatbot.py
  18. 50
      streamlit_pages.py
  19. 206
      test_research.py

@ -37,6 +37,7 @@ class ArangoDB:
self.db = self.client.db(db_name, username=user, password=password)
def fix_key(self, _key):
"""
Sanitize a given key by replacing all characters that are not alphanumeric,

@ -23,7 +23,6 @@ class BaseClass:
return ArangoDB()
else:
from colorprinter.print_color import print_yellow
print_yellow(f"User: {self.username}")
return ArangoDB(user=self.username, db_name=self.username)
def get_article_collections(self) -> list:
@ -81,10 +80,46 @@ class BaseClass:
class StreamlitBaseClass(BaseClass):
"""
StreamlitBaseClass is a base class for Streamlit applications that provides methods for managing user settings, session state, and user interactions with collections and projects.
Methods:
__init__(username: str, **kwargs) -> None:
Initializes the StreamlitBaseClass with a username and additional keyword arguments.
get_settings(field: str = None):
Retrieves user settings from the database. If a specific field is provided, returns the value of that field. Otherwise, returns all settings.
update_settings(key, value) -> None:
Updates a specific setting in the database and the Streamlit session state.
get_settings():
Retrieves user settings from the database.
update_session_state(page_name=None):
Updates the Streamlit session state with the attributes of the current instance. If a page name is provided, updates the session state for that page.
update_current_page(page_name):
Updates the current page in the Streamlit session state and the database.
choose_collection(text="Select a collection of favorite articles") -> str:
Displays a select box for choosing a collection of favorite articles. Updates the current collection in the session state and the database.
choose_project(text="Select a project") -> str:
Displays a select box for choosing a project. Updates the current project in the session state and the database.
"""
def __init__(self, username: str, **kwargs) -> None:
super().__init__(username, **kwargs)
def get_settings(self):
def get_settings(self, field: str = None):
"""
Retrieve or initialize user settings from the database.
This method fetches the user settings document from the "settings" collection
in the ArangoDB database. If the settings document does not exist, it initializes
it with default values for "current_collection" and "current_page". The settings
are then stored in the Streamlit session state.
Args:
field (str, optional): The specific field to retrieve from the settings.
If not provided, the entire settings document is returned.
Returns:
dict or any: The entire settings document if no field is specified,
otherwise the value of the specified field.
"""
settings = self.user_arango.db.document("settings/settings")
if not settings:
self.user_arango.db.collection("settings").insert(
@ -94,9 +129,21 @@ class StreamlitBaseClass(BaseClass):
if i not in settings:
settings[i] = None
st.session_state["settings"] = settings
if field:
return settings[field]
return settings
def update_settings(self, key, value) -> None:
"""
Update a specific setting in the database and session state.
Args:
key (str): The key of the setting to update.
value (Any): The new value for the setting.
Returns:
None
"""
self.user_arango.db.collection("settings").update_match(
filters={"_key": "settings"},
body={key: value},
@ -104,11 +151,6 @@ class StreamlitBaseClass(BaseClass):
)
st.session_state["settings"][key] = value
def get_settings(self):
return self.user_arango.db.document("settings/settings")
def update_session_state(self, page_name=None):
"""
Updates the Streamlit session state with the attributes of the current instance.
@ -135,12 +177,36 @@ class StreamlitBaseClass(BaseClass):
# print(k.upper(), v)
def update_current_page(self, page_name):
"""
Updates the current page in the session state and settings.
Args:
page_name (str): The name of the page to set as the current page.
Side Effects:
Updates the "current_page" in the session state and settings if it is different from the current value.
"""
if st.session_state.get("current_page") != page_name:
st.session_state["current_page"] = page_name
self.update_settings("current_page", page_name)
def choose_collection(self, text="Select a collection of favorite articles") -> str:
"""
Prompts the user to select a collection of favorite articles from a list.
Args:
text (str): The prompt text to display for the selection box. Defaults to "Select a collection of favorite articles".
Returns:
str: The name of the selected collection.
Side Effects:
- Sets the `project` attribute to None.
- Sets the `collection` attribute to the selected collection.
- Updates the settings with the key "current_collection" to the selected collection.
- Updates the session state.
"""
collections = self.get_article_collections()
collection = st.selectbox(text, collections, index=None)
if collection:
@ -151,12 +217,28 @@ class StreamlitBaseClass(BaseClass):
return collection
def choose_project(self, text="Select a project") -> str:
projects = self.get_projects()
"""
Prompts the user to select a project from a list of available projects.
Args:
text (str): The prompt text to display for project selection. Defaults to "Select a project".
Returns:
str: The name of the selected project.
project = st.selectbox(text, projects, index=None)
Side Effects:
- Updates the current project settings.
- Updates the session state.
- Prints the chosen project name to the console.
"""
projects = self.get_projects()
print('projects', projects)
print(self.project_name)
project = st.selectbox(text, projects, index=projects.index(self.project_name) if self.project_name in projects else None)
print('Choosing project...')
if project:
from _classes import Project
from projects_page import Project
self.project = Project(self.username, project, self.user_arango)
self.collection = None
self.update_settings("current_project", self.project.name)

@ -69,6 +69,8 @@ class ChromaDB:
n += 1
if n > max_retries:
break
if where == {}:
where = None
r = col.query(
query_texts=query,
n_results=n_results - len(sources),

@ -1,314 +0,0 @@
import streamlit as st
from time import sleep
from datetime import datetime, timedelta
from colorprinter.print_color import *
from _base_class import StreamlitBaseClass
from _rss import RSSReader
from projects_page import Project
from streamlit_chatbot import StreamlitChat
class BotChatPage(StreamlitBaseClass):
def __init__(self, username):
super().__init__(username=username)
self.collection_name = None
self.project_name = None
self.project: Project = None
self.chat = None
self.role = "Research Assistant" # Default persona
self.page_name = "Bot Chat"
self.chat_key = None
# Initialize attributes from session state if available
if self.page_name in st.session_state:
for k, v in st.session_state[self.page_name].items():
setattr(self, k, v)
def run(self):
from streamlit_chatbot import EditorBot, ResearchAssistantBot, PodBot, StreamlitBot
bot = None
self.update_current_page("Bot Chat")
self.remove_old_unsaved_chats()
self.sidebar_actions()
if self.collection_name or self.project:
print_purple("Collection:", self.collection_name, "Project:", self.project_name)
# If no chat exists, create a new Chat instance
self.chat = self.get_chat(role=self.role)
# Create a Bot instance with the Chat object
if self.role == "Research Assistant":
print_blue("Creating Research Assistant Bot")
bot = ResearchAssistantBot(
username=self.username,
chat=self.chat,
collection=self.collection_name,
project=self.project,
)
elif self.role == "Editor":
bot = EditorBot(
username=self.username,
chat=self.chat,
collection=self.collection,
project=self.project,
)
elif self.role == "Podcast":
st.session_state["make_podcast"] = True
# with st.sidebar:
with st.sidebar:
with st.form("make_podcast_form"):
instructions = st.text_area(
"What should the podcast be about? Give a brief description, as if you were the producer."
)
start = st.form_submit_button("Make Podcast!")
if start:
bot = PodBot(
subject=self.project.name,
username=self.username,
chat=self.chat,
collection=self.collection,
project=self.project,
instructions=instructions,
)
# Run the bot (this will display chat history and process user input)
if bot:
bot.run()
# Save updated chat state to session state
st.session_state[self.page_name] = {
"collection": self.collection,
"project": self.project,
"chat": self.chat,
"role": self.role,
}
else: # If no collection or project is selected, use the conversational response bot
print_yellow("No collection or project selected. Using conversational response bot.")
bot = StreamlitBot(
username=self.username,
chat=self.get_chat(),
tools=["conversational_response_tool"],
)
bot.run()
def get_chat(self, role="Research Assistant"):
print_blue('CHAT TYPE:', role)
if 'chat_key' not in st.session_state:
chat = StreamlitChat(username=self.username, role=role)
st.session_state['chat_key'] = chat._key
print_blue("Creating new chat:", st.session_state['chat_key'])
else:
print_blue("Old chat:", st.session_state['chat_key'])
chat_data = self.user_arango.db.collection("chats").get(st.session_state['chat_key'])
chat = StreamlitChat.from_dict(chat_data)
return chat
def sidebar_actions(self):
with st.sidebar:
self.collection = self.choose_collection(
"Article collection to use for chat:"
)
self.project = self.choose_project("Project to use for chat:")
if self.collection or self.project:
st.write("---")
if self.project:
self.role = st.selectbox(
"Choose Bot Role",
options=["Research Assistant", "Editor", "Podcast"],
index=0,
)
elif self.collection:
self.role = "Research Assistant"
# Load existing chats from the database
if self.project:
chat_history = list(
self.user_arango.db.aql.execute(
f'FOR doc IN chats FILTER doc["project"] == "{self.project}" RETURN {{"_key": doc["_key"], "name": doc["name"]}}'
)
)
# self.project = Project(username=self.username, project_name=self.project_name, user_arango=self.user_arango)
elif self.collection:
chat_history = list(
self.user_arango.db.aql.execute(
f'FOR doc IN chats FILTER doc["collection"] == "{self.collection}" RETURN {{"_key": doc["_key"], "name": doc["name"]}}'
)
)
chats = {i["name"]: i["_key"] for i in chat_history}
selected_chat = st.selectbox(
"Continue another chat", options=[""] + list(chats.keys()), index=0
)
if selected_chat:
st.session_state["chat_key"] = chats[selected_chat]
self.chat = self.get_chat()
if not self.role:
self.role == "Research Assistant"
def remove_old_unsaved_chats(self):
two_weeks_ago = datetime.now() - timedelta(weeks=2)
q = f'FOR doc IN chats FILTER doc.saved == false AND doc.last_updated < "{two_weeks_ago.isoformat()}" RETURN doc'
print_blue(q)
old_chats = self.user_arango.db.aql.execute(
f'FOR doc IN chats RETURN doc'
)
print('test', old_chats)
old_chats = self.user_arango.db.aql.execute(
f'FOR doc IN chats FILTER doc.saved == false AND doc.last_updated < "{two_weeks_ago.isoformat()}" RETURN doc'
)
for chat in old_chats:
print_red(chat["_id"])
self.user_arango.db.collection("chats").delete(chat["_key"])
class SettingsPage(StreamlitBaseClass):
def __init__(self, username: str):
super().__init__(username=username)
def run(self):
self.update_current_page("Settings")
self.set_profile_picture()
self.use_reasoning_model()
def set_profile_picture(self):
st.markdown("Profile picture")
profile_picture = st.file_uploader(
"Upload profile picture", type=["png", "jpg", "jpeg"]
)
if profile_picture:
# Resize the image to 64x64 pixels
from PIL import Image
img = Image.open(profile_picture)
img.thumbnail((64, 64))
img_path = f"user_data/{st.session_state['username']}/profile_picture.png"
img.save(img_path)
self.update_settings("avatar", img_path)
st.success("Profile picture uploaded")
sleep(1)
def use_reasoning_model(self):
"""
Displays a checkbox in the Streamlit interface to enable or disable the reasoning model for generating responses in chats.
Retrieves the current settings and checks if the "use_reasoning_model" key exists. If not, it initializes it to False.
Then, it displays a markdown text and a checkbox for the user to toggle the reasoning model usage.
The updated setting is saved back to the settings.
Returns:
None
"""
settings = self.get_settings()
if "use_reasoning_model" not in settings:
settings["use_reasoning_model"] = False
st.markdown("Use Reasoning Model")
use_reasoning_model = st.checkbox("Use Reasoning Model", value=settings["use_reasoning_model"], help="Use the reasoning model to generate responses in chats. This may take longer to process.")
self.update_settings("use_reasoning_model", use_reasoning_model)
class RSSFeedsPage(StreamlitBaseClass):
def __init__(self, username: str):
super().__init__(username=username)
self.page_name = "RSS Feeds"
self.reader = RSSReader(username=username)
# Initialize attributes from session state if available
for k, v in st.session_state.get(self.page_name, {}).items():
setattr(self, k, v)
def run(self):
if "selected_feed" not in st.session_state:
st.session_state["selected_feed"] = None
self.update_current_page(self.page_name)
self.display_feed()
self.sidebar_actions()
self.update_session_state(page_name=self.page_name)
def select_rss_feeds(self):
rss_feeds = self.reader.get_rss_feeds()
if rss_feeds:
feed_options = [feed["title"] for feed in rss_feeds]
with st.sidebar:
st.subheader("Show your feeds")
selected_feed_title = st.selectbox(
"Select a feed", options=feed_options, index=None
)
if selected_feed_title:
st.session_state["selected_feed"] = [
feed["_key"]
for feed in rss_feeds
if feed["title"] == selected_feed_title
][0]
st.rerun()
else:
st.write("You have no RSS feeds added.")
def search_feeds(self, rss_url):
with st.spinner("Discovering feeds..."):
feeds = self.reader.discover_feeds(rss_url)
if feeds:
st.session_state["discovered_feeds"] = feeds
else:
st.error("No RSS feeds found at the provided URL.")
def sidebar_actions(self):
if "discovered_feeds" not in st.session_state:
st.session_state["discovered_feeds"] = None
with st.sidebar:
self.select_rss_feeds()
st.subheader("Add a New RSS Feed")
with st.form("add_rss_feed"):
rss_url = st.text_input("Website URL or RSS Feed URL")
submitted = st.form_submit_button("Discover Feeds")
if submitted:
print_green(rss_url)
feeds = self.reader.discover_feeds(rss_url)
st.session_state["discovered_feeds"] = feeds
if st.session_state["discovered_feeds"]:
st.subheader("Select a Feed to Add")
feeds = st.session_state["discovered_feeds"]
feed_options = [f"{feed['title']} ({feed['href']})" for feed in feeds]
selected_feed = st.selectbox("Available Feeds", options=feed_options)
selected_feed_url = feeds[feed_options.index(selected_feed)]["href"]
if st.button("Preview Feed"):
feed = self.reader.parse_feed(selected_feed_url)
st.write(f"{feed.title}")
description = self.reader.html_to_markdown(feed.description)
st.write(f"_{description}_")
for entry in feed.entries[:5]:
with st.expander(entry["title"]):
summary = entry.get("summary", "No summary available")
markdown_summary = self.reader.html_to_markdown(summary)
st.markdown(markdown_summary)
print_yellow(selected_feed_url)
if st.button(
"Add RSS Feed",
on_click=self.reader.add_rss_feed,
args=[selected_feed_url],
):
del st.session_state["discovered_feeds"]
st.success("RSS Feed added.")
st.rerun()
def display_feed(self):
if st.session_state["selected_feed"]:
self.reader.get_feed(st.session_state["selected_feed"])
st.title(self.reader.feed.title)
st.write(f"_{self.reader.feed.description}_")
for entry in self.reader.feed.entries[:5]:
with st.expander(entry["title"]):
summary = entry.get("summary", "No summary available")
markdown_summary = self.reader.html_to_markdown(summary)
st.markdown(markdown_summary)
st.markdown(f"[Read more]({entry['link']})")

@ -35,6 +35,7 @@ class LLM:
chosen_backend (str): The chosen backend server for the API.
client (Client): The client for synchronous API calls.
async_client (AsyncClient): The client for asynchronous API calls.
tools (list): List of tools to be used in generating the response.
Methods:
__init__(self, system_message, temperature, model, max_length_answer, messages, chat, chosen_backend):
@ -75,6 +76,7 @@ class LLM:
messages: list[dict] = None,
chat: bool = True,
chosen_backend: str = None,
tools: list = None,
) -> None:
"""
Initialize the assistant with the given parameters.
@ -112,23 +114,28 @@ class LLM:
"X-Chosen-Backend": self.chosen_backend,
}
self.host_url = os.getenv("LLM_API_URL").rstrip("/api/chat/")
self.client: Client = Client(host=self.host_url, headers=headers)
self.host_url = 'http://192.168.1.12:3300' #! Change back when possible
self.client: Client = Client(host=self.host_url, headers=headers, timeout=120)
self.async_client: AsyncClient = AsyncClient()
def get_credentials(self):
# Initialize the client with the host and default headers
credentials = f"{os.getenv('LLM_API_USER')}:{os.getenv('LLM_API_PWD_LASSE')}"
return base64.b64encode(credentials.encode()).decode()
def get_model(self, model_alias):
models = {
"standard": "LLM_MODEL",
"small": "LLM_MODEL_SMALL",
"vision": "LLM_MODEL_VISION",
"standard_64k": "LLM_MODEL_64K",
"standard_64k": "LLM_MODEL_LARGE",
"reasoning": "LLM_MODEL_REASONING",
"tools": "LLM_MODEL_TOOLS",
}
return os.getenv(models.get(model_alias, "LLM_MODEL"))
model = os.getenv(models.get(model_alias, "LLM_MODEL"))
self.model = model
return model
def count_tokens(self):
num_tokens = 0
@ -165,6 +172,7 @@ class LLM:
] = None,
temperature: float = None,
messages: list[dict] = None,
format = None
):
"""
Generate a response based on the provided query and context.
@ -178,6 +186,7 @@ class LLM:
model (Optional[Literal["small", "standard", "vision", "tools"]]): The model type to be used.
temperature (float): The temperature setting for the model.
messages (list[dict]): List of previous messages in the conversation.
format (Optional[BaseModel]): The format of the response.
Returns:
str: The generated response or an error message if an exception occurs.
@ -218,29 +227,31 @@ class LLM:
headers["X-Model-Type"] = "small"
if model == self.get_model("tools"):
headers["X-Model-Type"] = "tools"
elif model == self.get_model("reasoning"):
headers["X-Model-Type"] = "reasoning"
# Prepare options
options = Options(**self.options)
options.temperature = temperature
# Adjust the options for long messages
if self.chat or len(self.messages) > 15000 and model != self.get_model("tools"):
num_tokens = self.count_tokens() + self.max_length_answer // 2
if num_tokens > 8000:
model = self.get_model("standard_64k")
print_purple("Switching to large model")
headers["X-Model-Type"] = "large"
#TODO This is a bit of a hack to get the reasoning model to work. It should be handled better.
# # Adjust the options for long messages
# if self.chat or len(self.messages) > 15000 and model != self.get_model("tools"):
# num_tokens = self.count_tokens()
# if num_tokens > 8000:
# model = self.get_model("standard_64k")
# print_purple("Switching to large model")
# headers["X-Model-Type"] = "large"
# Call the client.chat method
try:
print('###########')
self.call_model = model
print()
print('Headers:', headers)
print_yellow('Model:', model)
print()
headers['X-Chosen-Backend'] = 'backend_tools_server'
self.client: Client = Client(host=self.host_url, headers=headers)
self.client: Client = Client(host=self.host_url, headers=headers, timeout=300) #!
#print_rainbow(self.client._client.__dict__)
print_yellow("Model used in call:", model)
# if headers:
# self.client.headers.update(headers)
response = self.client.chat(
model=model,
messages=self.messages,
@ -248,6 +259,7 @@ class LLM:
stream=stream,
options=options,
keep_alive=3600 * 24 * 7,
format=format
)
except ResponseError as e:
@ -279,6 +291,8 @@ class LLM:
# Process the response
if isinstance(response, ChatResponse):
result = response.message.content.strip('"')
if '</think>' in result:
result = result.split('</think>')[-1]
self.messages.append(
{"role": "assistant", "content": result.strip('"')}
)
@ -446,10 +460,6 @@ class LLM:
headers["X-Model-Type"] = "large"
# Call the async client's chat method
print()
print_rainbow(self.async_client.__dict__)
print(model, headers, )
print()
try:
response = await self.async_client.chat(
model=model,
@ -479,6 +489,7 @@ class LLM:
self.messages[0]["content"] += system_message_info
self.messages[-1] = {"role": "user", "content": user_input}
print_red(self.async_client.last_response.headers.get("X-Chosen-Backend", "No backend"))
# Update chosen_backend
if model not in [self.get_model("vision"), self.get_model("tools"), self.get_model("reasoning")]:
self.chosen_backend = self.async_client.last_response.headers.get(

File diff suppressed because it is too large Load Diff

@ -29,15 +29,15 @@ class Document:
def __init__(
self,
pdf_file=None,
filename: str=None,
doi: str=None,
username: str=None,
is_sci: bool=None,
is_image: bool=False,
text: str=None,
_key: str=None,
arango_db_name: str=None,
arango_collection: str=None,
filename: str = None,
doi: str = None,
username: str = None,
is_sci: bool = None,
is_image: bool = False,
text: str = None,
_key: str = None,
arango_db_name: str = None,
arango_collection: str = None,
):
self.filename = filename
self.pdf_file = pdf_file
@ -63,11 +63,10 @@ class Document:
if self.pdf_file:
self.open_pdf(self.pdf_file)
def make_summary_in_background(self):
if not self._id and all([self.arango_collection, self._key]):
self._id = f"{self.arango_collection}/{self._key}"
if not self._id:
return
data = {
@ -281,8 +280,19 @@ class Processor:
self.arango_collection = arango_collection
return arango_collection
def extract_doi(self, text, multi=False):
"""
Extracts the DOI (Digital Object Identifier) from the given text.
Args:
text (str): The text from which to extract the DOI.
multi (bool, optional): If True, extract multiple DOIs from the text. Defaults to False.
Returns:
str or list or None:
- If multi is False, returns the extracted DOI as a string if found, otherwise None.
- If multi is True, returns a list of extracted DOIs if found, otherwise None.
"""
doi_pattern = r"10\.\d{4,9}/[-._;()/:A-Za-z0-9]+"
if multi:
@ -297,7 +307,7 @@ class Processor:
if self.get_crossref(doi):
self.document.metadata = self.get_crossref(doi)
self.document.doi = doi
else:
elif self.document.pdf:
for page in self.document.pdf.pages(0, 6):
text = page.get_text()
if re.search(doi_pattern, text):
@ -316,18 +326,20 @@ class Processor:
I want you to find the DOI of the article. Ansewer ONLY with the DOI, nothing else.
If you can't find the DOI, answer "not_found".
'''
st.write('Trying to extract DOI from text using LLM...')
doi = llm.generate(prompt).replace('https://doi.org/', '')
st.write("Trying to extract DOI from text using LLM...")
doi = llm.generate(prompt).replace("https://doi.org/", "")
if doi == "not_found":
return None
else:
doi = re.search(doi_pattern, doi).group()
break
else:
print_yellow(f"DOI not extracted: {doi}")
return doi
else:
return None
def chunks2chroma(self, _id, key):
st.write("Adding to vector database...")
assert self.document.text, "Document must have 'text' attribute."
@ -442,7 +454,11 @@ class Processor:
)
arango_document["metadata"] = self.document.metadata
arango_document["summary"] = {
"text_sum": self.document.metadata["abstract"],
"text_sum": (
self.document.metadata["abstract"]["text_sum"]
if "text_sum" in self.document.metadata["abstract"]
else self.document.metadata["abstract"]
),
"meta": {"model": "from_metadata"},
}
@ -608,7 +624,6 @@ class Processor:
if not self.document.is_sci:
self.document.is_sci = bool(self.document.metadata)
arango_collection = self.get_arango()
doc = arango_collection.get(self.document._key) if self.document.doi else None
@ -624,21 +639,22 @@ class Processor:
"title": self.document.get_title(only_meta=True)
}
elif 'title' not in doc['metadata']:
self.document.doc["metadata"]["title"] = self.document.get_title(only_meta=True)
elif "title" not in doc["metadata"]:
self.document.doc["metadata"]["title"] = self.document.get_title(
only_meta=True
)
if "user_access" not in doc or doc['user_access'] == None:
if "user_access" not in doc or doc["user_access"] == None:
self.document.doc["user_access"] = [self.document.username]
else:
if self.document.username not in doc['user_access']:
if self.document.username not in doc["user_access"]:
self.document.doc["user_access"] = doc.get("user_access", []) + [
self.document.username
]
self.metadata = self.document.doc["metadata"]
arango_collection.update(self.document.doc)
return doc["_id"], arango_collection.db_name, self.document.doi
else:
self.document.doc = (
{"doi": self.document.doi, "_key": fix_key(self.document.doi)}
@ -665,6 +681,8 @@ class Processor:
only_meta=True
)
if "_key" not in self.document.doc:
if not self.document.metadata:
self.document.metadata = {}
if self.document.doi:
_key = self.document.doi
@ -672,7 +690,10 @@ class Processor:
_key = self.document.title
elif self.document.get_title():
_key = self.document.get_title()
elif 'title' in self.document.doc["metadata"] and self.document.doc["metadata"]["title"]:
elif (
"title" in self.document.doc["metadata"]
and self.document.doc["metadata"]["title"]
):
_key = self.document.doc["metadata"]["title"]
else:
_key = self.document.pdf_file.name

@ -0,0 +1,236 @@
import streamlit as st
from datetime import datetime, timedelta
from colorprinter.print_color import *
from _base_class import StreamlitBaseClass
from _rss import RSSReader
from projects_page import Project
from streamlit_chatbot import StreamlitChat, StreamlitBot
class BotChatPage(StreamlitBaseClass):
"""
BotChatPage - A Streamlit interface for chatting with various AI assistants.
This class provides a user interface for interacting with different types of AI bots
(Research Assistant, Editor, Podcast) that can access and work with user's collections
and projects.
Attributes:
username (str): The username of the current user.
collection_name (str): Name of the selected collection.
project_name (str): Name of the selected project.
project (Project): Project instance the chat is associated with.
chat (StreamlitChat): Chat instance for maintaining conversation history.
role (str): The selected bot persona, default is "Research Assistant".
page_name (str): Name of the current page ("Bot Chat").
chat_key (str): Unique identifier for the current chat session.
bot (StreamlitBot): Instance of the selected bot type.
Methods:
run(): Main method to render the chat interface and handle interactions.
get_chat(role, new_chat): Retrieves existing chat or creates a new one.
sidebar_actions(): Renders sidebar elements for selecting collections, projects, and chat options.
remove_old_unsaved_chats(): Cleans up unsaved chats older than two weeks.
"""
def __init__(self, username):
super().__init__(username=username)
self.collection_name = None
self.project_name = None
self.project: Project = None
self.chat = None
self.role = "Research Assistant" # Default persona
self.page_name = "Bot Chat"
self.chat_key = None
self.bot: StreamlitBot = None
# Initialize attributes from session state if available
if self.page_name in st.session_state:
for k, v in st.session_state[self.page_name].items():
setattr(self, k, v)
def run(self):
from streamlit_chatbot import EditorBot, ResearchAssistantBot, PodBot, StreamlitBot
self.bot: StreamlitBot = None
self.update_current_page("Bot Chat")
self.remove_old_unsaved_chats()
self.sidebar_actions()
if self.collection_name or self.project:
print_purple("Collection:", self.collection_name, "Project:", self.project_name)
# If no chat exists, create a new Chat instance
self.chat = self.get_chat(role=self.role)
# Create a Bot instance with the Chat object
if self.role == "Research Assistant":
print_blue("Creating Research Assistant Bot")
self.bot: ResearchAssistantBot = ResearchAssistantBot(
username=self.username,
chat=self.chat,
collection=self.collection_name,
project=self.project,
tools=[
"fetch_other_documents_tool",
"fetch_science_articles_tool",
"fetch_science_articles_and_other_documents_tool",
"conversational_response_tool"]
)
elif self.role == "Editor":
self.bot: StreamlitBot = EditorBot(
username=self.username,
chat=self.chat,
collection=self.collection,
project=self.project,
tools=[
"fetch_other_documents_tool",
"fetch_notes_tool",
"conversational_response_tool"]
)
elif self.role == "Podcast":
st.session_state["make_podcast"] = True
# with st.sidebar:
with st.sidebar:
with st.form("make_podcast_form"):
instructions = st.text_area(
"What should the podcast be about? Give a brief description, as if you were the producer."
)
start = st.form_submit_button("Make Podcast!")
if start:
bot = PodBot(
subject=self.project.name,
username=self.username,
chat=self.chat,
collection=self.collection,
project=self.project,
instructions=instructions
)
# Save updated chat state to session state
st.session_state[self.page_name] = {
"collection": self.collection,
"project": self.project,
"chat": self.chat,
"role": self.role,
}
# Run the bot (this will display chat history and process user input)
if self.bot:
self.bot.run()
else: # If no collection or project is selected, use the conversational response bot
print_yellow("No collection or project selected. Using conversational response bot.")
self.bot: StreamlitBot = StreamlitBot(
username=self.username,
chat=self.get_chat(),
tools=["conversational_response_tool"],
)
self.bot.run()
def get_chat(self, role="Research Assistant", new_chat=False):
"""
Retrieves or creates a chat session.
This method handles chat session management by either creating a new chat,
retrieving an existing one from the database, or initializing a chat when
none exists in the session state.
Parameters:
-----------
role : str, optional
The role assigned to the chat (default is "Research Assistant").
new_chat : bool, optional
If True, creates a new chat regardless of existing sessions (default is False).
Returns:
--------
StreamlitChat
A chat instance either newly created or retrieved from the database.
Notes:
------
- If new_chat is True, a new chat is always created
- If no chat exists in session state, a new one is created
- Otherwise, retrieves the existing chat from the database using the chat_key in session state
"""
print_blue('CHAT TYPE:', role)
if new_chat:
chat = StreamlitChat(username=self.username, role=role)
st.session_state['chat_key'] = chat._key
print_blue("Creating new chat:", st.session_state['chat_key'])
elif 'chat_key' not in st.session_state:
chat = StreamlitChat(username=self.username, role=role)
st.session_state['chat_key'] = chat._key
print_blue("Creating new chat:", st.session_state['chat_key'])
else:
print_blue("Old chat:", st.session_state['chat_key'])
chat_data = self.user_arango.db.collection("chats").get(st.session_state['chat_key'])
chat = StreamlitChat.from_dict(chat_data)
return chat
def sidebar_actions(self):
with st.sidebar:
with st.form("select_chat"):
self.collection = self.choose_collection("Article collection to use for chat:")
self.project = self.choose_project("Project to use for chat:")
submitted = st.form_submit_button("Select Collection/Project")
with st.form("chat_settings"):
if submitted or any([self.collection, self.project]):
if self.project:
self.role = st.selectbox(
"Choose Bot Role",
options=["Research Assistant", "Editor", "Podcast"],
index=0,
)
elif self.collection:
self.role = "Research Assistant"
# Load existing chats from the database
if self.project:
chat_history = list(
self.user_arango.db.aql.execute(
f'FOR doc IN chats FILTER doc["project"] == "{self.project}" RETURN {{"_key": doc["_key"], "name": doc["name"]}}'
)
)
# self.project = Project(username=self.username, project_name=self.project_name, user_arango=self.user_arango)
elif self.collection:
chat_history = list(
self.user_arango.db.aql.execute(
f'FOR doc IN chats FILTER doc["collection"] == "{self.collection}" RETURN {{"_key": doc["_key"], "name": doc["name"]}}'
)
)
chats = {i["name"]: i["_key"] for i in chat_history}
selected_chat = st.selectbox(
"Continue another chat", options=[""] + list(chats.keys()), index=None
)
if not self.role:
self.role == "Research Assistant"
start_chat = st.form_submit_button("Start Chat")
if start_chat:
if selected_chat:
st.session_state["chat_key"] = chats[selected_chat]
self.chat = self.get_chat()
else:
self.chat = self.get_chat(role=self.role, new_chat=True)
st.rerun()
def remove_old_unsaved_chats(self):
two_weeks_ago = datetime.now() - timedelta(weeks=2)
q = f'FOR doc IN chats FILTER doc.saved == false AND doc.last_updated < "{two_weeks_ago.isoformat()}" RETURN doc'
print_blue(q)
old_chats = self.user_arango.db.aql.execute(
f'FOR doc IN chats RETURN doc'
)
print('test', old_chats)
old_chats = self.user_arango.db.aql.execute(
f'FOR doc IN chats FILTER doc.saved == false AND doc.last_updated < "{two_weeks_ago.isoformat()}" RETURN doc'
)
for chat in old_chats:
print_red(chat["_id"])
self.user_arango.db.collection("chats").delete(chat["_key"])

@ -11,7 +11,7 @@ from colorprinter.print_color import *
class ArticleCollectionsPage(StreamlitBaseClass):
def __init__(self, username: str):
super().__init__(username=username)
self.collection = None
self.collection = self.get_settings()["current_collection"]
self.page_name = "Article Collections"
# Initialize attributes from session state if available
@ -49,6 +49,7 @@ class ArticleCollectionsPage(StreamlitBaseClass):
def choose_collection(self):
collections = self.get_article_collections()
current_collection = self.collection
print_yellow(f"Current collection: {current_collection}")
preselected = (
collections.index(current_collection)
if current_collection in collections
@ -143,26 +144,33 @@ class ArticleCollectionsPage(StreamlitBaseClass):
)
collection_articles += list(cursor)
# Sort articles by title
# Filter out None values and sort articles by title
collection_articles = sorted(
collection_articles,
key=lambda x: x.get("metadata", {}).get("title", "No Title"),
[article for article in collection_articles if article is not None],
key=lambda x: (
x.get("metadata", {}).get("title", "No Title")
if x.get("metadata") is not None
else "No Title"
),
)
if collection_articles:
st.markdown(f"#### Articles in *{self.collection}*:")
for article in collection_articles:
if article is None:
continue
metadata = article.get("metadata")
if metadata is None:
continue
title = metadata.get("title", "No Title").strip()
journal = metadata.get("journal", "No Journal").strip()
published_year = metadata.get("published_year", "No Year")
published_date = metadata.get("published_date", None)
language = metadata.get("language", "No Language")
metadata = article.get("metadata", {})
if metadata:
title = metadata.get("title", "No Title").strip()
journal = metadata.get("journal", "No Journal").strip()
published_year = metadata.get("published_year", "No Year")
published_date = metadata.get("published_date", None)
language = metadata.get("language", "No Language")
else:
title = "No Title"
journal = "No Journal"
published_year = "No Year"
published_date = None
language = "No Language"
icon = country_emojis.get(language.upper(), "") if language else ""
expander_title = f"**{title}** *{journal}* ({published_year}) {icon}"
@ -202,9 +210,7 @@ class ArticleCollectionsPage(StreamlitBaseClass):
# Let the user add notes to the article, if it's not a scientific article
# if not article._id.startswith("sci_articles"):
if "user_notes" in article and article["user_notes"]:
st.markdown(
f":blue[**Your notes:**]"
)
st.markdown(f":blue[**Your notes:**]")
note_number = 0
for note in article["user_notes"]:
note_number += 1
@ -212,17 +218,20 @@ class ArticleCollectionsPage(StreamlitBaseClass):
with c1:
st.markdown(f":blue[{note}]")
with c2:
st.button(key=f'{article["_key"]}_{note_number}',
st.button(
key=f'{article["_key"]}_{note_number}',
label=f":red[Delete note]",
on_click=self.delete_article_note,
args=(article, note),
)
with st.form(f"add_info_form_{article['_id']}", clear_on_submit=True):
with st.form(
f"add_info_form_{article['_id']}", clear_on_submit=True
):
new_info = st.text_area(
":blue[Add a note about the article]",
key=f'new_info_{article["_id"]}',
help="Add information such as what kind of article it is, what it's about, who's the author, etc.",
help="Add information such as what kind of article it is, what it's about, who's the author, etc.",
)
submitted = st.form_submit_button(":blue[Add note]")
if submitted:
@ -234,7 +243,38 @@ class ArticleCollectionsPage(StreamlitBaseClass):
on_click=self.delete_article,
args=(self.collection, article["_id"]),
)
# Add info button and form
st.markdown(":grey[Change metadata]")
with st.form(f"update_metadata_form_{article['_id']}", clear_on_submit=True):
new_title = st.text_input(
":blue[Update title]",
key=f'new_metadata_{article["_id"]}_title',
help="Update the title of the article.",
)
new_author = st.text_input(
":blue[Update author]",
key=f'new_metadata_{article["_id"]}_author',
help="Update the author of the article.",
)
new_journal = st.text_input(
":blue[Update journal]",
key=f'new_metadata_{article["_id"]}_journal',
help="Update the journal of the article.",
)
new_published_year = st.text_input(
":blue[Update published year]",
key=f'new_metadata_{article["_id"]}_published_year',
help="Update the published year of the article.",
)
submitted_metadata = st.form_submit_button(":blue[Add info]")
if submitted_metadata:
for info in ['new_title', 'new_author', 'new_journal', 'new_published_year']:
if info:
self.update_article(article, "metadata", info)
else:
st.write("No articles in this collection.")
@ -389,10 +429,25 @@ class ArticleCollectionsPage(StreamlitBaseClass):
self.update_session_state(page_name=self.page_name)
def update_article(self, article, field, value):
"Update a field in an article document"
"""
Update a specified field in an article with a new value.
If the field already exists and is a list, the new value is appended to the list.
If the field exists but is not a list, the field is converted to a list containing
the old and new values. If the field does not exist, it is created as a list with
the new value.
Args:
article (dict): The article to be updated.
field (str): The field in the article to be updated.
value (str): The new value to be added to the field.
Returns:
None
"""
value = str(value.strip())
print(value)
print(type(value))
if field in article:
if isinstance(article[field], list):
article[field].append(value)
@ -409,4 +464,4 @@ class ArticleCollectionsPage(StreamlitBaseClass):
if "user_notes" in article and note in article["user_notes"]:
article["user_notes"].remove(note)
self.user_arango.db.update_document(article, check_rev=False, silent=True)
sleep(0.1)
sleep(0.1)

@ -0,0 +1,103 @@
from _rss import RSSReader
import streamlit as st
from _base_class import StreamlitBaseClass
from colorprinter.print_color import *
class RSSFeedsPage(StreamlitBaseClass):
def __init__(self, username: str):
super().__init__(username=username)
self.page_name = "RSS Feeds"
self.reader = RSSReader(username=username)
# Initialize attributes from session state if available
for k, v in st.session_state.get(self.page_name, {}).items():
setattr(self, k, v)
def run(self):
if "selected_feed" not in st.session_state:
st.session_state["selected_feed"] = None
self.update_current_page(self.page_name)
self.display_feed()
self.sidebar_actions()
self.update_session_state(page_name=self.page_name)
def select_rss_feeds(self):
rss_feeds = self.reader.get_rss_feeds()
if rss_feeds:
feed_options = [feed["title"] for feed in rss_feeds]
with st.sidebar:
st.subheader("Show your feeds")
selected_feed_title = st.selectbox(
"Select a feed", options=feed_options, index=None
)
if selected_feed_title:
st.session_state["selected_feed"] = [
feed["_key"]
for feed in rss_feeds
if feed["title"] == selected_feed_title
][0]
st.rerun()
else:
st.write("You have no RSS feeds added.")
def search_feeds(self, rss_url):
with st.spinner("Discovering feeds..."):
feeds = self.reader.discover_feeds(rss_url)
if feeds:
st.session_state["discovered_feeds"] = feeds
else:
st.error("No RSS feeds found at the provided URL.")
def sidebar_actions(self):
if "discovered_feeds" not in st.session_state:
st.session_state["discovered_feeds"] = None
with st.sidebar:
self.select_rss_feeds()
st.subheader("Add a New RSS Feed")
with st.form("add_rss_feed"):
rss_url = st.text_input("Website URL or RSS Feed URL")
submitted = st.form_submit_button("Discover Feeds")
if submitted:
print_green(rss_url)
feeds = self.reader.discover_feeds(rss_url)
st.session_state["discovered_feeds"] = feeds
if st.session_state["discovered_feeds"]:
st.subheader("Select a Feed to Add")
feeds = st.session_state["discovered_feeds"]
feed_options = [f"{feed['title']} ({feed['href']})" for feed in feeds]
selected_feed = st.selectbox("Available Feeds", options=feed_options)
selected_feed_url = feeds[feed_options.index(selected_feed)]["href"]
if st.button("Preview Feed"):
feed = self.reader.parse_feed(selected_feed_url)
st.write(f"{feed.title}")
description = self.reader.html_to_markdown(feed.description)
st.write(f"_{description}_")
for entry in feed.entries[:5]:
with st.expander(entry["title"]):
summary = entry.get("summary", "No summary available")
markdown_summary = self.reader.html_to_markdown(summary)
st.markdown(markdown_summary)
print_yellow(selected_feed_url)
if st.button(
"Add RSS Feed",
on_click=self.reader.add_rss_feed,
args=[selected_feed_url],
):
del st.session_state["discovered_feeds"]
st.success("RSS Feed added.")
st.rerun()
def display_feed(self):
if st.session_state["selected_feed"]:
self.reader.get_feed(st.session_state["selected_feed"])
st.title(self.reader.feed.title)
st.write(f"_{self.reader.feed.description}_")
for entry in self.reader.feed.entries[:5]:
with st.expander(entry["title"]):
summary = entry.get("summary", "No summary available")
markdown_summary = self.reader.html_to_markdown(summary)
st.markdown(markdown_summary)
st.markdown(f"[Read more]({entry['link']})")

@ -0,0 +1,6 @@
from pydantic import BaseModel
class QueryResponse(BaseModel):
query_to_vector_database: str
short_explanation: str

@ -20,15 +20,13 @@ from prompts import get_note_summary_prompt, get_image_system_prompt
import env_manager
env_manager.set_env()
print_green("Environment variables set.")
class ProjectsPage(StreamlitBaseClass):
def __init__(self, username: str):
super().__init__(username=username)
self.projects = []
self.selected_project_name = None
self.project = None
self.project = self.get_settings("current_project")
self.page_name = "Projects"
# Initialize attributes from session state if available
@ -52,9 +50,11 @@ class ProjectsPage(StreamlitBaseClass):
def display_projects(self):
with st.sidebar:
self.new_project_button()
projects = [proj["name"] for proj in self.projects]
self.selected_project_name = st.selectbox(
"Select a project to manage",
options=[proj["name"] for proj in self.projects],
index=projects.index(self.project) if self.project in projects else None,
)
if self.selected_project_name:
self.project = Project(
@ -136,10 +136,12 @@ class ProjectsPage(StreamlitBaseClass):
if interviews:
for interview in interviews:
st.markdown(f'_{interview.get("timestamp", "")}_')
st.markdown(
f"**Interviewees:** {', '.join(interview['intervievees'])}"
)
st.markdown(f"**Interviewer:** {interview['interviewer']}")
if interview['intervievees']:
st.markdown(
f"**Interviewees:** {', '.join(interview['intervievees'])}"
)
if interview['interviewer']:
st.markdown(f"**Interviewer:** {interview['interviewer']}")
if len(interview["transcript"].split("\n")) > 6:
preview = (
" \n".join(interview["transcript"].split("\n")[:6])
@ -261,7 +263,7 @@ class ProjectsPage(StreamlitBaseClass):
def upload_interview_form(self):
with st.expander("Upload interview"):
with st.form("add_interview", clear_on_submit=True):
interview = st.file_uploader("Upload interview audio file")
interview = st.file_uploader("Upload interview audio file or transcript")
interviewees = st.text_input(
"Enter the names of the interviewees, separated by commas"
)
@ -398,6 +400,7 @@ class Project(StreamlitBaseClass):
def load_project(self):
print_blue("Project name:", self.name)
project_cursor = self.user_arango.db.aql.execute(
"FOR doc IN projects FILTER doc.name == @name RETURN doc",
bind_vars={"name": self.name},
@ -415,6 +418,7 @@ class Project(StreamlitBaseClass):
self.settings = project.get("settings", {})
self.notes_summary = project.get("notes_summary", "")
def update_project(self):
updated_doc = {
"_key": self._key,
@ -492,9 +496,27 @@ class Project(StreamlitBaseClass):
document_type="interview",
is_image=False,
)
elif interview.type in ["plain/text"]:
# TODO Implement text file processing
pass
elif interview.type in ["application/json", "text/plain"]:
import json
print_purple("JSON file processing")
interview_content = interview.getvalue().decode("utf-8")
print('Content:', interview_content)
interview_json = json.loads(interview_content)
formated_transcription = self.format_json_transcription(interview_json)
self.add_interview_transcript(
formated_transcription,
interview.name,
intervievees=None,
interviewer=None,
date_of_interveiw=None
)
else:
print(interview.type)
st.error("Unsupported file type")
st.stop()
st.rerun()
def add_interview_transcript(
self,
@ -504,7 +526,6 @@ class Project(StreamlitBaseClass):
interviewer: str = None,
date_of_interveiw: datetime.date = None,
):
print_yellow(transcript)
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
_key = fix_key(f"{filename}_{timestamp}")
if intervievees:
@ -515,7 +536,7 @@ class Project(StreamlitBaseClass):
interviewer = self.username
if not self.user_arango.db.has_collection("interviews"):
self.user_arango.db.create_collection("interviews")
if date_of_interveiw:
if isinstance(date_of_interveiw, str):
date_of_interveiw = datetime.strptime(date_of_interveiw, "%Y-%m-%d")
from article2db import Document
@ -529,8 +550,7 @@ class Project(StreamlitBaseClass):
username=self.username,
arango_collection="interviews",
)
print_rainbow(document.__dict__)
print(document.text)
document.make_chunks(len_chunks=600)
self.user_arango.db.collection("interviews").insert(
@ -619,6 +639,32 @@ class Project(StreamlitBaseClass):
timestamp = None
return "\n".join(transcript)
def format_json_transcription(self, transcription: dict):
transcript = []
print(transcript)
if isinstance(transcription, list):
# For the JSON format in MacWhisper
for line in transcription:
speaker = line.get("speaker", None)
if speaker:
line = f"[{line['timestamp']}] {speaker}: {line['text']}"
else:
line = f"[{line['timestamp']}] {line['text']}"
transcript.append(line)
elif isinstance(transcription, dict):
# For the DOT format in MacWhisper
if 'lines' in transcription:
for line in transcription['lines']:
timestamp = line['startTime']
text = line['text']
speaker = line.get('speaker', None)
if speaker:
line = f"[{timestamp}] {speaker}: {text}"
else:
line = f"[{timestamp}] {text}"
transcript.append(line)
return "\n".join(transcript)
def delete_note(self, note_id):
if note_id in self.notes:
self.notes.remove(note_id)
@ -727,7 +773,7 @@ class Project(StreamlitBaseClass):
return None
try:
page = wikipedia.page(page_name)
page = wikipedia.page(page_name, auto_suggest=False)
data = {
"title": page.title,
"summary": page.summary,
@ -747,7 +793,7 @@ class Project(StreamlitBaseClass):
)
if wiki_data.get("summary"):
query = f'''Summarize the text below. It's from a Wikipedia page about {wiki_data["title"]}. \n\n"""{wiki_data['summary']}"""\nMake a detailed and concise summary of the text.'''
summary = llm.generate(query)
summary = llm.generate(query).content
wiki_data["text"] = (
f"(_Summarised using AI, read original [here]({wiki_url})_)\n{summary}"
)
@ -759,12 +805,15 @@ class Project(StreamlitBaseClass):
self.add_note(wiki_data)
processor = PDFProcessor(process=False)
dois = [
processor.extract_doi(ref)
for ref in wiki_data.get("references", [])
if processor.extract_doi(ref)
]
if dois:
dois = []
print_rainbow(wiki_data.get("references", []))
for ref in wiki_data.get("references", []):
doi = processor.extract_doi(ref)
if doi:
print_blue("Found DOI:", doi)
dois.append(doi)
if len(dois) > 0:
current_collection = st.session_state["settings"].get("current_collection")
st.markdown(
f"Found {len(dois)} references with DOI numbers. Do you want to add them to {current_collection}?"

@ -139,7 +139,8 @@ def get_query_builder_system_message():
Take the user input and write it as a sentence that could be used as a query for a vector database.
The vector database will return text snippets that semantically match the query, so you CAN'T USE NEGATIONS or other complex language constructs. If there is a negation in the user input, exclude that part from the query.
If the user input seems to be a follow-up question or comment, use the context from the chat history to make a relevant query.
Answer ONLY with the query, no explanation or reasoning!
Remember that the query is meant to return information on a specific topic, so make sure the query is focused on that topic. Don't make a query to search for tools or methods (if it's not information about a specific method, e.g. a scientific method), only for actual information.
Answer ONLY with the query, NO explanation or reasoning!
"""
return re.sub(r"\s*\n\s*", "\n", system_message)
@ -171,8 +172,8 @@ def get_image_system_prompt(project):
def get_tools_prompt(user_input):
prompt = f'''User message: "{user_input}"
You have to choose one or many tools in order to answer the message. It's important that you think of what information (if any) is needed to make a good answer.
Make sure to read the description of the tools carefully before choosing!
You have to choose one or many tools in order to answer the message. It's important that you think of what information is needed to make a good answer.
Make sure to read the description of the tools carefully before choosing! E.g. chose the conversational response tool ONLY if the user is small talking or asking, use other tools if the user is asking a question or want information.
You can ONLY chose a tool you are provided with, don't make up a tool!
You HAVE TO CHOOSE A TOOL, even if you think you can answer without it. Don't answer the question without choosing a tool.
'''
@ -203,10 +204,11 @@ def get_summary_prompt(text, is_sci):
def get_generate_vector_query_prompt(user_input: str, role: str):
print(role.upper())
if role in ["Research Assistant", "Editor"]:
query = f"""A user asked this question: "{user_input}". Generate a query for the vector database. Make sure to follow the instructions you got earlier!"""
elif role == "Guest":
query = f"""A podcast host has asked this question in an interview: "{user_input}". Generate a query for the vector database to answer the actial question. Make sure to follow the instructions you got earlier!"""
if role == "Guest":
query = f"""A podcast host has asked this question in an interview: "{user_input}". Generate a query for the vector database to answer the actial question."""
elif role == "Host":
query = f"""An expert has stated: "{user_input}". Generate a query for the vector database to get context for that answer in order to come up with a new question. Make sure to follow the instructions you got earlier!"""
query = f"""An expert has stated: "{user_input}". Generate a query for the vector database to get context for that answer in order to come up with a new question."""
else:
query = f"""A user asked this question: "{user_input}". Generate a query for the vector database"""
query += "\nMake sure to follow the instructions you got earlier!"
return query

@ -0,0 +1,406 @@
import streamlit as st
from datetime import datetime
from colorprinter.print_color import *
from _base_class import StreamlitBaseClass
from projects_page import Project
from agent_research import ResearchReport, MasterAgent, StructureAgent, ToolAgent, ArchiveAgent, process_step
import os
import json
class ResearchPage(StreamlitBaseClass):
"""
ResearchPage - A Streamlit interface for deep research using AI agents.
This class provides a user interface for conducting in-depth research using
multiple specialized AI agents working together. It allows users to input
research questions, track progress, and view detailed research reports.
Attributes:
username (str): The username of the current user.
project_name (str): Name of the selected project.
project (Project): Project instance the research is associated with.
page_name (str): Name of the current page ("Research").
research_state (dict): Dictionary tracking the current state of research.
report (ResearchReport): Instance for tracking research progress and results.
Methods:
run(): Main method to render the research interface and handle interactions.
sidebar_actions(): Renders sidebar elements for selecting projects and research options.
start_new_research(): Initiates a new research session.
view_saved_reports(): Displays a list of saved research reports.
display_report(): Renders a research report in the Streamlit interface.
show_research_progress(): Displays the current research progress.
"""
def __init__(self, username):
super().__init__(username=username)
self.project_name = None
self.project = None
self.page_name = "Research"
# Research state tracking
self.research_state = {
"in_progress": False,
"completed": False,
"question": None,
"started_at": None,
"report": None,
"current_step": None,
"steps_completed": 0,
"total_steps": 0
}
self.report = None
# Initialize attributes from session state if available
if self.page_name in st.session_state:
for k, v in st.session_state[self.page_name].items():
setattr(self, k, v)
# Create reports directory if it doesn't exist
os.makedirs(f"/home/lasse/sci/reports", exist_ok=True)
def run(self):
self.update_current_page("Research")
self.sidebar_actions()
st.title("Deep Research")
if not self.project:
st.warning("Please select a project to start researching.")
return
# Main interface
if self.research_state["in_progress"]:
self.show_research_progress()
elif self.research_state["completed"]:
self.display_report(self.research_state["report"])
else:
# Input for new research
st.subheader(f"New Research for Project: {self.project_name}")
with st.form("research_form"):
question = st.text_area("Enter your research question:",
help="Be specific about what you want to research. Complex questions will be broken down into sub-questions.")
start_button = st.form_submit_button("Start Research")
if start_button and question:
self.start_new_research(question)
st.rerun()
# Option to view saved reports
with st.expander("View Saved Reports"):
self.view_saved_reports()
def sidebar_actions(self):
with st.sidebar:
with st.form("select_project"):
self.project = self.choose_project("Project for research:")
submitted = st.form_submit_button("Select Project")
if submitted and self.project:
self.project_name = self.project.name
st.success(f"Selected project: {self.project_name}")
if self.research_state["in_progress"]:
st.info(f"Research in progress: {self.research_state['question']}")
if st.button("Cancel Research"):
self.research_state["in_progress"] = False
st.rerun()
elif self.research_state["completed"]:
if st.button("Start New Research"):
self.research_state["completed"] = False
self.research_state["report"] = None
st.rerun()
def start_new_research(self, question):
"""Initiates a new research session with the given question"""
self.research_state["question"] = question
self.research_state["in_progress"] = True
self.research_state["completed"] = False
self.research_state["started_at"] = datetime.now().isoformat()
# Initialize the research report
self.report = ResearchReport(
question=question,
username=self.username,
project_name=self.project_name
)
# Save current state
st.session_state[self.page_name] = {
"project_name": self.project_name,
"project": self.project,
"research_state": self.research_state,
"report": self.report
}
# Start a new thread to run the research process
# In a production environment, you might want to use a background job
# For now, we'll run it in the main thread with streamlit spinner
with st.spinner("Research in progress... This may take several minutes."):
try:
# Initialize agents
master_agent = MasterAgent(
username=self.username,
project=self.project,
report=self.report,
chat=True
)
structure_agent = StructureAgent(
username=self.username,
model="small",
report=self.report
)
tool_agent = ToolAgent(
username=self.username,
model="tools",
system_message="You are an assistant with tools. Always choose a tool to help with the task.",
report=self.report,
project=self.project,
chat=True
)
archive_agent = ArchiveAgent(
username=self.username,
report=self.report,
project=self.project,
system_message="You are an assistant specialized in reading and summarizing research information.",
chat=True
)
# Track the research state in the master agent
master_agent.research_state["original_question"] = question
# Execute the research workflow
# 1. Create research plan
st.text("Creating research plan...")
research_plan = master_agent.make_plan(question)
self.report.log_plan(research_plan)
# 2. Structure the plan
st.text("Structuring research plan...")
structured_plan = structure_agent.make_structured(research_plan, question)
self.report.log_plan(research_plan, structured_plan.model_dump())
# Update total steps count
self.research_state["total_steps"] = len(structured_plan.steps)
# 3. Execute the plan step by step
execution_results = {}
for step_name, tasks in structured_plan.steps.items():
st.text(f"Processing step: {step_name}")
self.research_state["current_step"] = step_name
self.research_state["steps_completed"] += 1
# Collect all task descriptions in this step
step_tasks = [
{"task_name": task_name, "task_description": task_description}
for task_name, task_description in tasks
]
# Process the entire step
step_result = process_step(
step_name, step_tasks, master_agent, tool_agent, archive_agent
)
execution_results[step_name] = step_result
# 4. Evaluate if more steps are needed
st.text("Evaluating research plan...")
plan_evaluation = master_agent.evaluate_plan(execution_results)
self.report.log_plan_evaluation(plan_evaluation)
# 5. Write the final report
st.text("Writing final report...")
final_report = master_agent.write_report(execution_results)
self.report.log_final_report(final_report)
# 6. Save the reports
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
report_path = f"/home/lasse/sci/reports/research_report_{self.username}_{timestamp}"
# Save JSON report
json_path = f"{report_path}.json"
with open(json_path, "w") as f:
json.dump(self.report.get_full_report(), f, indent=2)
# Save markdown report
markdown_report = self.report.get_markdown_report()
markdown_path = f"{report_path}.md"
with open(markdown_path, "w") as f:
f.write(markdown_report)
# Update research state
self.research_state["in_progress"] = False
self.research_state["completed"] = True
self.research_state["report"] = {
"json_path": json_path,
"markdown_path": markdown_path,
"report_data": self.report.get_full_report(),
"markdown_content": markdown_report
}
except Exception as e:
st.error(f"An error occurred during research: {str(e)}")
import traceback
st.code(traceback.format_exc())
self.research_state["in_progress"] = False
# Update session state
st.session_state[self.page_name] = {
"project_name": self.project_name,
"project": self.project,
"research_state": self.research_state,
"report": self.report
}
def view_saved_reports(self):
"""Displays a list of saved research reports"""
reports_dir = "/home/lasse/sci/reports"
if not os.path.exists(reports_dir):
st.info("No saved reports found.")
return
# Get all report files
json_files = [f for f in os.listdir(reports_dir) if f.endswith('.json') and f.startswith('research_report')]
if not json_files:
st.info("No saved reports found.")
return
for file in sorted(json_files, reverse=True):
file_path = os.path.join(reports_dir, file)
try:
with open(file_path, 'r') as f:
report_data = json.load(f)
# Extract basic info
question = report_data.get("metadata", {}).get("question", "Unknown question")
project = report_data.get("metadata", {}).get("project_name", "No project")
started_at = report_data.get("metadata", {}).get("started_at", "Unknown time")
# Format the date
try:
date_obj = datetime.fromisoformat(started_at)
date_str = date_obj.strftime("%Y-%m-%d %H:%M")
except:
date_str = started_at
# Create an expandable section for each report
st.markdown(f"_{question} ({project} - {date_str})_")
st.markdown(f"**Project:** {project}")
st.markdown(f"**Date:** {date_str}")
# Button to view full report
if st.button("View Full Report", key=f"view_{file}"):
# Load corresponding markdown file if it exists
md_file = file.replace('.json', '.md')
md_path = os.path.join(reports_dir, md_file)
if os.path.exists(md_path):
with open(md_path, 'r') as f:
markdown_content = f.read()
else:
markdown_content = None
self.research_state["completed"] = True
self.research_state["report"] = {
"json_path": file_path,
"markdown_path": md_path if os.path.exists(md_path) else None,
"report_data": report_data,
"markdown_content": markdown_content
}
st.rerun()
except Exception as e:
st.error(f"Error loading report {file}: {str(e)}")
def display_report(self, report_data):
"""Renders a research report in the Streamlit interface"""
if not report_data:
st.warning("No report data available.")
return
st.title("Research Report")
# Get report data
markdown_content = report_data.get("markdown_content")
json_data = report_data.get("report_data")
if markdown_content:
# Display the markdown report
st.markdown(markdown_content)
elif json_data:
# Fallback to displaying JSON data in a more readable format
question = json_data.get("metadata", {}).get("question", "Unknown question")
st.header(f"Research on: {question}")
# Display metadata
st.subheader("Metadata")
metadata = json_data.get("metadata", {})
st.markdown(f"**Project:** {metadata.get('project_name', 'None')}")
st.markdown(f"**Started:** {metadata.get('started_at', 'Unknown')}")
st.markdown(f"**Finished:** {metadata.get('finished_at', 'Unknown')}")
# Display final report
st.subheader("Research Findings")
st.markdown(json_data.get("final_report", "No final report available."))
# Display steps
st.subheader("Research Steps")
steps = json_data.get("steps", {})
for step_name, step_data in steps.items():
with st.expander(step_name):
st.markdown(f"**Summary:** {step_data.get('summary', 'No summary available.')}")
# Display tools used
st.markdown("**Tools used:**")
for tool in step_data.get("tools_used", []):
st.markdown(f"- {tool.get('tool', 'Unknown tool')} with query: _{tool.get('args', {}).get('query', 'No query')}_")
else:
st.error("No report content available to display.")
# Download buttons
col1, col2 = st.columns(2)
with col1:
if report_data.get("markdown_path") and os.path.exists(report_data["markdown_path"]):
with open(report_data["markdown_path"], "r") as f:
markdown_content = f.read()
st.download_button(
label="Download as Markdown",
data=markdown_content,
file_name=os.path.basename(report_data["markdown_path"]),
mime="text/markdown"
)
with col2:
if report_data.get("json_path") and os.path.exists(report_data["json_path"]):
with open(report_data["json_path"], "r") as f:
json_content = f.read()
st.download_button(
label="Download as JSON",
data=json_content,
file_name=os.path.basename(report_data["json_path"]),
mime="application/json"
)
def show_research_progress(self):
"""Displays the current research progress"""
st.subheader("Research in Progress")
st.markdown(f"**Question:** {self.research_state['question']}")
# Show progress bar
progress = 0
if self.research_state["total_steps"] > 0:
progress = self.research_state["steps_completed"] / self.research_state["total_steps"]
st.progress(progress)
# Show current step
current_step = self.research_state.get("current_step", "Planning")
st.markdown(f"**Current step:** {current_step}")
st.info("Research is ongoing. This may take several minutes depending on the complexity of the question.")
st.warning("Please do not navigate away from this page while research is in progress.")

@ -0,0 +1,55 @@
import streamlit as st
from time import sleep
from colorprinter.print_color import *
from _base_class import StreamlitBaseClass
class SettingsPage(StreamlitBaseClass):
def __init__(self, username: str):
super().__init__(username=username)
def run(self):
self.update_current_page("Settings")
self.set_profile_picture()
self.use_reasoning_model()
def set_profile_picture(self):
st.markdown("Profile picture")
profile_picture = st.file_uploader(
"Upload profile picture", type=["png", "jpg", "jpeg"]
)
if profile_picture:
# Resize the image to 64x64 pixels
from PIL import Image
img = Image.open(profile_picture)
img.thumbnail((64, 64))
img_path = f"user_data/{st.session_state['username']}/profile_picture.png"
img.save(img_path)
self.update_settings("avatar", img_path)
st.success("Profile picture uploaded")
sleep(1)
def use_reasoning_model(self):
"""
Displays a checkbox in the Streamlit interface to enable or disable the reasoning model for generating responses in chats.
Retrieves the current settings and checks if the "use_reasoning_model" key exists. If not, it initializes it to False.
Then, it displays a markdown text and a checkbox for the user to toggle the reasoning model usage.
The updated setting is saved back to the settings.
Returns:
None
"""
settings = self.get_settings()
if "use_reasoning_model" not in settings:
settings["use_reasoning_model"] = False
st.markdown("Use Reasoning Model")
use_reasoning_model = st.checkbox("Use Reasoning Model", value=settings["use_reasoning_model"], help="Use the reasoning model to generate responses in chats. This may take longer to process.")
self.update_settings("use_reasoning_model", use_reasoning_model)

@ -59,7 +59,8 @@ if st.session_state["authentication_status"]:
Bot_Chat,
Projects,
Settings,
RSS_Feeds
RSS_Feeds,
Research
)
break
@ -85,8 +86,11 @@ if st.session_state["authentication_status"]:
article_collections = st.Page(Article_Collections)
settings = st.Page(Settings)
rss_feeds = st.Page(RSS_Feeds)
research = st.Page(Research)
sleep(0.1)
pg = st.navigation([bot_chat, projects, article_collections, rss_feeds, settings])
pg = st.navigation([bot_chat, projects, article_collections, research, rss_feeds, settings])
sleep(0.1)
pg.run()
# try: #TODO Use this when in production

@ -5,15 +5,70 @@ from _llm import LLM
from prompts import *
from colorprinter.print_color import *
from ollama._types import Message as OllamaMessage
from ollama._types import ChatResponse as OllamaChatResponse
from projects_page import Project
from ollama_response_classes import QueryResponse
class Chat(StreamlitBaseClass):
def __init__(self, username=None, role=None, key=None, **kwargs):
"""
A class to represent a chat session in a Streamlit application.
Attributes:
-----------
name : str
The name of the chat.
chat_history : list
A list to store the chat history.
role : str
The role of the user in the chat.
project : str
The project associated with the chat.
collection : str
The collection associated with the chat.
_key : str
The unique key for the chat.
Methods:
--------
add_message(role, content):
Adds a message to the chat history.
to_dict():
Converts the chat object to a dictionary.
update_in_arango():
Updates the chat object in the ArangoDB.
set_name(user_input):
Sets the name of the chat based on user input.
show_title(title=None):
Displays the title of the chat in the Streamlit application.
from_dict(data):
Creates a Chat object from a dictionary.
chat_history2bot(n_messages=None, remove_system=False):
Converts the chat history to a format suitable for a bot.
"""
def __init__(
self,
username=None,
role=None,
key=None,
project=None,
collection=None,
**kwargs,
):
super().__init__(username=username, **kwargs)
self.name = kwargs.get("name", None)
self.chat_history = kwargs.get("chat_history", [])
self.role = role
self.project = kwargs.get("project") if "project" in kwargs else project
self.collection = (
kwargs.get("collection") if "collection" in kwargs else collection
)
self._key = key
def add_message(self, role, content):
if isinstance(content, str):
@ -71,6 +126,21 @@ class Chat(StreamlitBaseClass):
self.name = name
return name
def show_title(self, title=None):
title = (
title
if title
else (
self.project
if self.project
else self.collection if self.collection else "No title"
)
)
st.markdown(
f"""### Chat about *{title.strip()}* with *{self.role}*""",
)
@classmethod
def from_dict(cls, data):
return cls(
@ -96,6 +166,27 @@ class Chat(StreamlitBaseClass):
class StreamlitChat(Chat):
'''
A class to manage chat interactions within a Streamlit application.
Inherits from the Chat class and provides additional functionality to handle
chat history, user roles, and avatars within a Streamlit app context.
Attributes:
project (str): The project associated with the chat.
collection (str): The collection associated with the chat.
message_attachments (None): Placeholder for message attachments.
last_updated (str): Timestamp of the last update in ISO format.
_key (str): Unique identifier for the chat.
role (str): The role of the user in the chat.
username (str): The username of the user in the chat.
name (str): The name of the chat.
chat_history (list): List of messages in the chat history.
Methods:
show_chat_history():
get_avatar(message: dict = None, role=None) -> str:
'''
def __init__(self, username: str, role: str, _key: str = None, **kwargs):
super().__init__(username, role, _key, **kwargs)
self.project = kwargs.get("project", None)
@ -123,6 +214,15 @@ class StreamlitChat(Chat):
)["_key"]
def show_chat_history(self):
"""
Displays the chat history in the Streamlit app.
Iterates through the chat history and displays messages from the user and assistant.
Messages from other roles are ignored. Each message is displayed with an avatar.
Returns:
None
"""
for message in self.chat_history:
if message["role"] not in ["user", "assistant"]:
continue
@ -132,6 +232,19 @@ class StreamlitChat(Chat):
st.markdown(message["content"].strip('"'))
def get_avatar(self, message: dict = None, role=None) -> str:
"""
Retrieves the avatar image path based on the message or role provided.
Args:
message (dict, optional): A dictionary containing message details, including the role.
role (str, optional): The role of the user if the message is not provided.
Returns:
str: The file path to the avatar image.
Raises:
AssertionError: If neither message nor role is provided.
"""
assert message or role, "Either message or role must be provided"
if message and message.get("role", None) == "user" or role == "user":
avatar = st.session_state["settings"].get("avatar", "user")
@ -153,7 +266,38 @@ class StreamlitChat(Chat):
avatar = None
return avatar
class Bot(BaseClass):
'''
A chatbot class that integrates with research tools and document retrieval systems.
The Bot class provides an interface for conversational AI that can access and process
various document sources, including scientific articles, user notes, and other documents.
It initializes multiple specialized language models for different tasks, including
regular conversation, query generation, and tool selection.
Attributes:
username (str): The username associated with this bot instance.
chat (Chat): Chat instance for managing conversation history.
project (Project, optional): Associated project for document context.
collection (list, optional): Collections of documents to search within.
arango_ids (list): List of document IDs in ArangoDB.
chatbot (LLM): Main language bot for conversation.
helperbot (LLM): Bot for generating queries.
toolbot (LLM): Bot for selecting appropriate tools.
tools (list): List of tool functions available to the bot.
Methods:
initiate_bots(): Initialize the different language model instances.
get_chunks(): Retrieve relevant text chunks based on user input.
answer_tool_call(): Process and execute tool calls from the AI.
generate_from_notes(): Generate a response from user notes.
generate_from_chunks(): Generate a response from document chunks.
run(): Run the bot (implemented by subclasses).
get_notes(): Retrieve notes from the database.
fetch_science_articles_tool(): Retrieve scientific articles.
fetch_other_documents_tool(): Retrieve non-scientific documents.
fetch_science_articles_and_other_documents_tool(): Retrieve both document types.
fetch_notes_tool(): Retrieve user notes.
conversational_response_tool(): Generate a simple conversational response.
'''
def __init__(self, username: str, chat: Chat = None, tools: list = None, **kwargs):
super().__init__(username=username, **kwargs)
# Use the passed in chat or create a new Chat
@ -175,7 +319,6 @@ class Bot(BaseClass):
self.helperbot = None
self.toolbot = None
self.initiate_bots()
if self.collection:
for c in self.collection:
for _id in self.user_arango.db.aql.execute(
@ -189,21 +332,24 @@ class Bot(BaseClass):
):
self.arango_ids.append(_id)
# Map tool names to functions
tool_mapping = {
"fetch_other_documents_tool": self.fetch_other_documents_tool,
"fetch_science_articles_tool": self.fetch_science_articles_tool,
"fetch_science_articles_and_other_documents_tool": self.fetch_science_articles_and_other_documents_tool,
"fetch_notes_tool": self.fetch_notes_tool,
"conversational_response_tool": self.conversational_response_tool,
}
# Convert tool names to function references
if tools:
self.tools = [tool_mapping[tool] if isinstance(tool, str) else tool for tool in tools]
# Map tool names to functions
tool_mapping = {
"fetch_other_documents_tool": self.fetch_other_documents_tool,
"fetch_science_articles_tool": self.fetch_science_articles_tool,
"fetch_science_articles_and_other_documents_tool": self.fetch_science_articles_and_other_documents_tool,
"fetch_notes_tool": self.fetch_notes_tool,
"conversational_response_tool": self.conversational_response_tool,
}
self.tools = [
tool_mapping[tool] if isinstance(tool, str) else tool for tool in tools
]
else:
self.tools = None
self.initiate_bots()
# Store other kwargs
for arg in kwargs:
setattr(self, arg, kwargs[arg])
@ -215,6 +361,23 @@ class Bot(BaseClass):
# print_red(f"Error initiating bots: {e}")
def initiate_bots(self):
"""
Initialize the different bot instances used in the chatbot application.
Creates three types of bots:
1. chatbot: A standard LLM for normal conversation with the user
2. helperbot: A specialized LLM with low temperature for generating concise queries or prompts
3. toolbot: A specialized LLM for selecting which tool to use when responding to user queries
(only created if tools are provided)
The toolbot is configured to prefer specialized tools over conversational responses
when the user is seeking information rather than engaging in small talk.
Note:
- The chatbot uses the full chat history
- The helperbot uses a limited chat history (last 4 messages) with system message removed
- The toolbot uses a system message that lists all available tools
"""
# A standard LLM for normal chat
self.chatbot = LLM(messages=self.chat.chat_history2bot())
# A helper bot for generating queries or short prompts
@ -226,18 +389,24 @@ class Bot(BaseClass):
messages=self.chat.chat_history2bot(n_messages=4, remove_system=True),
)
# A specialized LLM picking which tool to use
self.toolbot = LLM(
temperature=0,
system_message="""
You are an helpful assistant with some tools.
Your task is to choose one or multiple tools to answering a user's query.
ALWAYS choose one or more of the provided tools.
DON'T come up with your own tools, only use the ones provided.
""",
# system_message='Use one of the provided tools to help the answering bot to answer the user. Do not answer directly. Use the "tool_calls" field in your answer.',
chat=False,
model="tools"
)
if self.tools:
tools_names = [tool.__name__ for tool in self.tools]
tools_name_string = "\n".join(tools_names)
self.toolbot = LLM(
temperature=0,
system_message=f"""
You are an helpful assistant with tools. The tools you can choose from are:
{tools_name_string}
Your task is to choose one or multiple tools to answering a user's query.
DON'T come up with your own tools, only use the ones provided.
""",
# system_message='Use one of the provided tools to help the answering bot to answer the user. Do not answer directly. Use the "tool_calls" field in your answer.',
chat=False,
model="tools",
)
if len(tools_names) > 1 and "conversational_response_tool" in tools_names:
self.toolbot.system_message += "\n\nMake sure to only use the conversational response tool if the user is engaging in small talk. If the user is asking a question or looking for information, make sure to use one of the other tools!"
def get_chunks(
self,
@ -247,31 +416,78 @@ class Bot(BaseClass):
n_sources=4,
filter=True,
):
# Basic version without Streamlit calls
query = self.helperbot.generate(
get_generate_vector_query_prompt(user_input, self.chat.role)
).content.strip('"')
"""
Retrieves relevant text chunks from the vector database based on user input.
This method:
1. Generates a vector query based on user input using the helper bot
2. Searches multiple collections in the vector database
3. Combines results and sorts them by relevance
4. Limits results to the specified number of unique sources
5. Cleans the text by removing footnote references
6. Enriches the chunks with detailed metadata from ArangoDB
7. Groups chunks by article title
Parameters:
-----------
user_input : str
The user query to search for relevant documents
collections : list, optional
List of collection names to search in (default: ["sci_articles", "other_documents"])
n_results : int, optional
Maximum number of results to return (default: 7)
n_sources : int, optional
Maximum number of unique document sources to include (default: 4)
filter : bool, optional
Whether to filter results by ArangoDB IDs (default: True)
Returns:
--------
dict
A dictionary of grouped chunks where:
- Keys are article titles
- Values are dictionaries containing:
- 'article_number': A sequential number for the article
- 'chunks': A list of chunk dictionaries, each containing:
- 'document': The document text
- 'metadata': The document metadata
- 'distance': The similarity distance (lower is better)
- 'article_number': The sequential number of the article
"""
response = self.helperbot.generate(
get_generate_vector_query_prompt(user_input, self.chat.role),
format=QueryResponse.model_json_schema(),
)
print(response)
print_yellow("RESPONSE:", response.content)
query_response = QueryResponse.model_validate_json(response.content)
query = query_response.query_to_vector_database
print_purple(f"Query for vector DB:\n {query}")
combined_chunks = []
if collections:
for collection in collections:
where_filter = {"_id": {"$in": self.arango_ids}} if filter else {}
chunks = self.get_chromadb().query(
query=query,
collection=collection,
n_results=n_results,
n_sources=n_sources,
where=where_filter,
max_retries=3,
)
for doc, meta, dist in zip(
chunks["documents"][0],
chunks["metadatas"][0],
chunks["distances"][0],
):
combined_chunks.append(
{"document": doc, "metadata": meta, "distance": dist}
if filter:
where_filter = {"_id": {"$in": self.arango_ids}}
chunks = self.get_chromadb().query(
query=query,
collection=collection,
n_results=n_results,
n_sources=n_sources,
where=where_filter,
max_retries=3,
)
for doc, meta, dist in zip(
chunks["documents"][0],
chunks["metadatas"][0],
chunks["distances"][0],
):
combined_chunks.append(
{"document": doc, "metadata": meta, "distance": dist}
)
combined_chunks.sort(key=lambda x: x["distance"])
# Keep the best chunks according to n_sources
@ -288,7 +504,11 @@ class Bot(BaseClass):
remaining_chunks = [c for c in combined_chunks if c not in closest_chunks]
closest_chunks.extend(remaining_chunks[: n_results - len(closest_chunks)])
# Now fetch real metadata from Arango
# Remove footnoot references like [\d+] from the text chunks
for chunk in closest_chunks:
chunk["document"] = re.sub(r"\[\d+\]", "", chunk["document"])
# Fetch real metadata from Arango
for chunk in closest_chunks:
_id = chunk["metadata"].get("_id")
if not _id:
@ -317,9 +537,38 @@ class Bot(BaseClass):
}
article_number += 1
grouped_chunks[title]["chunks"].append(chunk)
return grouped_chunks
def answer_tool_call(self, response, user_input):
"""
Process tool calls returned by the AI and execute the corresponding functions.
This method evaluates tool calls in the AI response, executes the appropriate
functions with the provided arguments, and collects the resulting responses.
Parameters:
-----------
response : dict
The AI response containing potential tool_calls to be executed
user_input : str
The original user query that will be passed to tool functions
Returns:
--------
list
A list of string responses generated from executing the tool calls.
Returns an empty string if no tool calls are present.
Notes:
------
Supported tool functions include:
- fetch_other_documents_tool: Retrieves non-scientific documents
- fetch_science_articles_tool: Retrieves scientific articles
- fetch_science_articles_and_other_documents_tool: Retrieves both types of documents
- fetch_notes_tool: Retrieves user notes
- conversational_response_tool: Generates a conversational response
"""
bot_responses = []
# This method returns / stores responses (no Streamlit calls)
if not response.get("tool_calls"):
@ -331,28 +580,22 @@ class Bot(BaseClass):
arguments["query"] = user_input
if hasattr(self, function_name):
print_purple("Function name:", function_name)
if function_name in [
"fetch_other_documents_tool",
"fetch_science_articles_tool",
"fetch_science_articles_and_other_documents_tool",
]:
chunks = getattr(self, function_name)(**arguments)
bot_responses.append(
self.generate_from_chunks(user_input, chunks)
)
bot_responses.append(self.generate_from_chunks(user_input, chunks))
elif function_name == "fetch_notes_tool":
notes = getattr(self, function_name)()
bot_responses.append(
self.generate_from_notes(user_input, notes)
)
bot_responses.append(self.generate_from_notes(user_input, notes))
elif function_name == "conversational_response_tool":
response: OllamaMessage = getattr(self, function_name)(user_input)
print_green('Conversation response:', response)
bot_responses.append(
response.content.strip('"')
)
print_rainbow(i for i in bot_responses)
return "\n\n".join(i for i in bot_responses)
print_green("Conversation response:", response)
bot_responses.append(response.content.strip('"'))
return bot_responses
# def process_user_input(self, user_input, content_attachment=None):
# # Add user message
@ -382,6 +625,29 @@ class Bot(BaseClass):
# return bot_response
def generate_from_notes(self, user_input, notes):
"""
Generate a response based on user input and a collection of notes.
This method takes a user query and relevant notes, formats the notes into a string,
creates a prompt with the formatted notes and user input, and generates a streamed response.
Parameters
----------
user_input : str
The user's query or message to respond to
notes : list of dict
A list of note dictionaries, where each note has 'title' and 'content' keys
Returns
-------
generator
A generator that streams the AI-generated response
Notes
-----
This method does not make any Streamlit calls and is safe to use outside of the Streamlit context.
The notes are formatted with titles and content separated by horizontal rules.
"""
# No Streamlit calls
notes_string = ""
for note in notes:
@ -394,6 +660,47 @@ class Bot(BaseClass):
return self.chatbot.generate(prompt, stream=True)
def generate_from_chunks(self, user_input, chunks):
"""
Generate a response based on user input and retrieved document chunks.
This method formats the retrieved document chunks into a structured string,
combines it with the user's input in a prompt, and generates a streaming
response using the chatbot.
Parameters:
-----------
user_input : str
The user's query or message to respond to.
chunks : dict
A dictionary containing document chunks organized by title.
Expected structure:
{
"title1": {
"chunks": [
{
"document": "content...",
"metadata": {
"user_notes": "optional notes..."
}
},
...
],
"article_number": int
},
...
}
Returns:
--------
generator
A streaming generator of the chatbot's response.
Notes:
------
- This method does not make any Streamlit API calls.
- User notes are included in the formatted content if available.
- The formatted content includes titles, article numbers, and document text.
"""
# No Streamlit calls
chunks_string = ""
for title, group in chunks.items():
@ -419,7 +726,7 @@ class Bot(BaseClass):
)
return list(notes)
def fetch_science_articles_tool(self, query: str, n_documents: int):
def fetch_science_articles_tool(self, query: str, n_documents: int = 6):
"""
"Fetches information from scientific articles. Use this tool when the user is looking for information from scientific articles."
@ -441,7 +748,7 @@ class Bot(BaseClass):
query, collections=["sci_articles"], n_results=n_documents
)
def fetch_other_documents_tool(self, query: str, n_documents: int):
def fetch_other_documents_tool(self, query: str, n_documents: int = 6):
"""
Fetches information from other documents based on the user's query.
@ -507,10 +814,8 @@ class Bot(BaseClass):
"""
Generate a conversational response to a user's query.
This method is designed to provide a short and conversational response
without fetching additional data. It should be used ONLY when it is clear
that the user is engaging in small talk (like saying 'hi') and not seeking detailed information.
If the user is asking for informaiton or a qualified answer, don't use this tool!
This method is designed to provide a short and conversational response without fetching additional data.
It should be used ONLY when it is clear that the user is engaging in small talk (like saying 'hi').
Args:
query (str): The user's message to which the bot should respond.
@ -521,7 +826,6 @@ class Bot(BaseClass):
query = f"""
User message: "{query}".
Make your answer short and conversational.
This is perhaps not a conversation about a journalistic project, so try not to be too informative.
Don't answer with anything you're not sure of!
"""
@ -543,12 +847,20 @@ class StreamlitBot(Bot):
st.session_state["llm_chosen_backend"] = self.chatbot.chosen_backend
settings = self.get_settings()
print("SETTINGS:", settings)
if settings.get("use_reasoning_model", False):
self.chatbot.model = self.chatbot.get_model("reasoning")
print_rainbow(settings)
print('MODEL', self.chatbot.model)
def run(self):
# Example Streamlit run loop
title = (
self.project.name
if self.project
else self.collection.name if self.collection else None
)
self.chat.show_title(title=title)
self.chat.show_chat_history()
if user_input := st.chat_input("Write your message here...", accept_file=True):
text_input = user_input.text.replace('"""', "---")
@ -597,18 +909,24 @@ class StreamlitBot(Bot):
def process_user_input(self, user_input, content_attachment=None):
# We override to show messages in Streamlit instead of just storing
self.chat.add_message("user", user_input)
# Remove conversational response tool if there are more than 2 messages
if len(self.chat.chat_history) > 2 and len(self.tools) > 1:
for tool in self.tools:
if tool.__name__ == "conversational_response_tool":
self.tools.remove(tool)
break
if not content_attachment:
prompt = get_tools_prompt(user_input)
print_rainbow(self.toolbot.__dict__)
response = self.toolbot.generate(prompt, tools=self.tools, stream=False)
if response.get("tool_calls"):
bot_response = self.answer_tool_call(response, user_input)
else:
bot_response = response.content.strip('"')
with st.chat_message(
"assistant", avatar=self.chat.get_avatar(role="assistant")
):
st.write(bot_response)
# with st.chat_message(
# "assistant", avatar=self.chat.get_avatar(role="assistant")
# ):
# st.write(bot_response)
else:
with st.chat_message(
"assistant", avatar=self.chat.get_avatar(role="assistant")
@ -644,67 +962,79 @@ class StreamlitBot(Bot):
self.chat.update_in_arango()
# def answer_tool_call(self, response, user_input): #! This should be in the Basse ChatBot?
# bot_responses = []
# for tool in response.get("tool_calls", []):
# function_name = tool.function.get("name")
# arguments = tool.function.arguments
# arguments["query"] = user_input
# print("Function name:", function_name)
# with st.chat_message(
# "assistant", avatar=self.chat.get_avatar(role="assistant")
# ):
# if function_name in [
# "fetch_other_documents_tool",
# "fetch_science_articles_tool",
# "fetch_science_articles_and_other_documents_tool",
# ]:
# chunks = getattr(self, function_name)(**arguments)
# response_text = self.generate_from_chunks(user_input, chunks)
# # Separate thinking chunk and normal chunk
# print_red("Model:", self.chatbot.model)
# if self.chatbot.model == "reasoning":
# bot_response = self.write_reasoning(response_text)
# else:
# bot_response = st.write_normal(response_text)
# bot_responses.append(bot_response)
# if chunks:
# sources = "###### Sources:\n"
# for title, group in chunks.items():
# j = group["chunks"][0]["metadata"].get(
# "journal", "No Journal"
# )
# d = group["chunks"][0]["metadata"].get(
# "published_date", "No Date"
# )
# sources += f"[{group['article_number']}] **{title}** :gray[*{j}* ({d})] \n"
# st.markdown(sources)
# bot_response += f"\n\n{sources}"
# bot_responses.append(bot_response)
# elif function_name == "fetch_notes_tool":
# notes = getattr(self, function_name)()
# response_text = self.generate_from_notes(user_input, notes)
# bot_responses.append(st.write_stream(response_text).strip('"'))
# elif function_name == "conversational_response_tool":
# response_text = getattr(self, function_name)(user_input)
# print(
# "###",
# self.chatbot.call_model,
# self.chatbot.get_model("reasoning"),
# )
# if self.chatbot.call_model == self.chatbot.get_model("reasoning"):
# print_blue("REASONING MODEL!")
# bot_response = self.write_reasoning(response_text).strip('"')
# else:
# bot_responses.append(st.write_stream(response_text))
# return "\n\n".join(bot_responses)
def answer_tool_call(
self, response, user_input
): #! This should be in the Base ChatBot?
bot_responses = []
tools_response = response.get("tool_calls", [])
for tool in tools_response:
function_name = tool.function.get("name")
if len(tools_response) > 1:
# Don't use conversational response tool if there are other tools
if function_name == "conversational_response_tool":
continue
arguments = tool.function.arguments
arguments["query"] = user_input
print("Function name:", function_name)
with st.chat_message(
"assistant", avatar=self.chat.get_avatar(role="assistant")
):
if function_name in [
"fetch_other_documents_tool",
"fetch_science_articles_tool",
"fetch_science_articles_and_other_documents_tool",
]:
chunks = getattr(self, function_name)(**arguments)
response_text = self.generate_from_chunks(user_input, chunks)
# Separate thinking chunk and normal chunk
print_red("Model:", self.chatbot.model)
if self.chatbot.model == "reasoning":
bot_response = self.write_reasoning(response_text)
else:
bot_response = self.write_normal(response_text)
bot_responses.append(bot_response)
if chunks:
sources = "###### Sources:\n"
for title, group in chunks.items():
j = group["chunks"][0]["metadata"].get(
"journal", "No Journal"
)
d = group["chunks"][0]["metadata"].get(
"published_date", "No Date"
)
sources += f"[{group['article_number']}] **{title}** :gray[*{j}* ({d})] \n"
st.markdown(sources)
bot_response += f"\n\n{sources}"
bot_responses.append(bot_response)
elif function_name == "fetch_notes_tool":
notes = getattr(self, function_name)()
response_text = self.generate_from_notes(user_input, notes)
bot_responses.append(st.write_stream(response_text).strip('"'))
elif function_name == "conversational_response_tool":
response_text = getattr(self, function_name)(user_input)
print(
"###",
response_text,
)
if self.chatbot.call_model == self.chatbot.get_model("reasoning"):
print_blue("REASONING MODEL!")
bot_response = self.write_reasoning(response_text).strip('"')
else:
if isinstance(response_text, OllamaMessage):
response_text = response_text.content
elif isinstance(response_text, dict):
response_text = response_text.get("content", "")
bot_response = self.write_normal(response_text).strip('"')
return "\n\n".join(bot_responses)
def write_reasoning(self, response_text):
chunks_iter = iter(response_text) # convert generator to iterator
@ -729,6 +1059,7 @@ class StreamlitBot(Bot):
return bot_response
else:
def full_gen():
if first_mode:
yield (first_mode, first_text)
@ -740,17 +1071,13 @@ class StreamlitBot(Bot):
def write_normal(self, response_text):
chunks_iter = iter(response_text) # convert generator to iterator
try:
first_mode, first_text = next(chunks_iter) # get first chunk
except StopIteration:
# no chunks at all
first_mode, first_text = None, None
def full_gen():
if first_mode:
yield (first_mode, first_text)
for mode, text in chunks_iter:
yield (mode, text)
for chunk in chunks_iter:
if isinstance(chunk, tuple) and len(chunk) == 2:
_, text = chunk
yield text
else:
yield chunk
bot_response = st.write_stream(full_gen()).strip('"')
return bot_response
@ -816,6 +1143,7 @@ class ResearchAssistantBot(StreamlitBot):
self.tools = [
self.fetch_science_articles_tool,
self.fetch_science_articles_and_other_documents_tool,
self.conversational_response_tool,
]
@ -954,7 +1282,7 @@ class HostBot(StreamlitBot):
Often "conversational_response_tool" is enough, but sometimes project notes are needed.
Make sure to read the description of the tools carefully!""",
chat=False,
model="tools"
model="tools",
)
def generate(self, query):
@ -986,7 +1314,7 @@ class GuestBot(StreamlitBot):
temperature=0,
system_message=f"You are an assistant to an expert on {subject}. Choose one or many tools to use in order to assist the expert in answering questions. Make sure to read the description of the tools carefully.",
chat=False,
model="tools"
model="tools",
)
def generate(self, query):

@ -1,37 +1,59 @@
import streamlit as st
from time import sleep
from colorprinter.print_color import *
from _classes import BotChatPage
def Projects():
"""
Function to handle the Projects page.
"""
from projects_page import ProjectsPage
if 'Projects' not in st.session_state:
st.session_state['Projects'] = {}
if "Projects" not in st.session_state:
st.session_state["Projects"] = {}
projectpage = ProjectsPage(username=st.session_state["username"])
projectpage.run()
def Bot_Chat():
"""
Function to handle the Chat Bot page.
"""
from chat_page import BotChatPage
print_blue("Bot Chat")
sleep(0.1)
if 'Bot Chat' not in st.session_state:
st.session_state['Bot Chat'] = {}
if "Bot Chat" not in st.session_state:
st.session_state["Bot Chat"] = {}
chatpage = BotChatPage(username=st.session_state["username"])
chatpage.run()
def Research():
"""
Function to handle the Deep Research page.
"""
from research_page import ResearchPage
print_blue("Research")
sleep(0.1)
if "Research" not in st.session_state:
st.session_state["Research"] = {}
researchpage = ResearchPage(username=st.session_state["username"])
researchpage.run()
def Article_Collections():
"""
Function to handle the Article Collections page.
"""
from collections_page import ArticleCollectionsPage
sleep(0.1)
if 'Article Collections' not in st.session_state:
st.session_state['Article Collections'] = {}
if "Article Collections" not in st.session_state:
st.session_state["Article Collections"] = {}
article_collection = ArticleCollectionsPage(username=st.session_state["username"])
article_collection.run()
@ -41,7 +63,8 @@ def Settings():
"""
Function to handle the Settings page.
"""
from _classes import SettingsPage
from settings_page import SettingsPage
settings = SettingsPage(username=st.session_state["username"])
sleep(0.1)
settings.run()
@ -51,10 +74,11 @@ def RSS_Feeds():
"""
Function to handle the RSS Feeds page.
"""
from _classes import RSSFeedsPage
if 'RSS Feeds' not in st.session_state:
st.session_state['RSS Feeds'] = {}
from feed_page import RSSFeedsPage
if "RSS Feeds" not in st.session_state:
st.session_state["RSS Feeds"] = {}
rss_feeds_page = RSSFeedsPage(username=st.session_state["username"])
sleep(0.1)
rss_feeds_page.run()
rss_feeds_page.run()

@ -0,0 +1,206 @@
from _llm import LLM
from _arango import ArangoDB
from _chromadb import ChromaDB
from streamlit_chatbot import Bot
from pydantic import BaseModel, Field
from typing import Dict, List, Tuple
from colorprinter.print_color import *
from projects_page import Project
from _base_class import StreamlitBaseClass
from prompts import get_tools_prompt
class ResearchBase(Bot):
def __init__(self, username, **args):
super().__init__(username=username, **args)
self.llm = LLM()
self.arango = ArangoDB()
self.chromadb = ChromaDB()
self.messages = []
def start(self):
self.messages = [{"role": "system", "message": self.llm.system_message}]
if self.llm.model in ["small", "standard", "vision", "reasoning", "tools"]:
self.llm.get_model(self.llm.model)
class ResearchManager(ResearchBase):
def __init__(self, username, project=None):
super().__init__(username=username, project=project)
self.llm.system_message = "You are an assistant helping a journalist writing a report based on extensive research."
self.llm.model = "reasoning"
self.start()
def generate_plan(self, question):
query = f"""
A journalist wants to get a report that answers this question: "{question}"
THIS IS *NOT* A QUESTION YOU CAN ANSWER! Instead, you need to make a plan for how to answer this question.
Include what type of information you need from what available sources.
Available sources are:
- Scientific articles
- Other articles the journalists has gathered, such as blog posts, news articles, etc.
- The journalists own notes.
- Transcribed interviews (already done, you can't produce new ones).
All of the above sources are available in a database, but you need to specify what you need. Be as precise as possible.
As you don't have access to the sources, include steps to retrieve excerpts from articles and retrieve those that might be interesting.
Also include steps to verify the information.
Make the plan easy to follow and structured.
Remember: You are not answering the question, you are making *a plan* for how to answer the question using the available sources.
"""
query += f"\nTo help you understand the subject, here is a summary of notes the journalist has done: {project.notes_summary}"
query += """Please structure the plan like:
## Step 1:
- Task1: Description of task
- Task2: Description of task
## Step 2:
- Task1: Description of task
- Task2: Description of task
Etc, with as many steps and tasks as needed.
"""
return self.llm.generate(query).content
class ResearchAssistant(ResearchBase):
def __init__(self, username):
super().__init__(username)
self.llm.system_message = "You are a Research Assistant"
self.start()
class HelperBot(ResearchBase):
def __init__(self, username):
super().__init__(username)
self.llm.system_message = "You are helping a researcher to structure a text. You will get a text and make it into structured data. Make sure not to change the meaning of the text and keeps all the details in the subtasks."
self.llm.model = "small"
self.start()
def make_structured_plan(self, text, question=None):
class Plan(BaseModel):
steps: Dict[str, List[Tuple[str, str]]] = Field(
description="Structured plan represented as steps with their corresponding tasks or facts",
example={
"Step 1: Gather Existing Materials": [
("Task 1", "Description of task"),
("Task 2", "Description of task"),
],
"Step 2: Extract Relevant Information": [
("Task 1", "Description of task"),
("Task 2", "Description of task"),
],
},
)
if question:
query = f''' This is a proposed plan for how to write a report on "{question}":\n"""{text}"""\nPlease make the plan into structured data with subtasks. Make sure to keep all the details in the subtasks.'''
else:
query = f''' This is a proposed plan for how to write a report:\n"""{text}"""\nPlease make the plan into structured data with subtasks. Make sure to keep all the details in the subtasks.'''
response = self.llm.generate(query, format=Plan.model_json_schema())
print(response)
structured_response = Plan.model_validate_json(response.content)
print('PLAN')
print_rainbow(structured_response)
print()
return structured_response
class ToolBot(ResearchBase):
def __init__(self, username, tools: list):
super().__init__(username, tools=tools)
self.start()
tools_names = [tool.__name__ for tool in self.tools]
tools_name_string = "\n".join(tools_names)
self.llm = LLM(
temperature=0,
system_message=f"""
You are an helpful assistant with tools. The tools you can choose from are:
{tools_name_string}
Your task is to choose one or multiple tools to answering a user's query.
DON'T come up with your own tools, only use the ones provided.
""",
chat=False,
model="tools",
)
def propose_tools(self, task):
query = f"""What tool(s) would you use to help with this task:
"{task}"
Answer in a structured way using the tool_calls field!
"""
query = get_tools_prompt(task)
response = self.llm.generate(query)
print_yellow('Model:', self.llm.model)
print_rainbow(response)
return response.tool_calls
if __name__ == "__main__":
base = StreamlitBaseClass(username="lasse")
project = Project(
username="lasse",
project_name="Monarch butterflies",
user_arango=base.get_arango(),
)
rm = ResearchManager(username="lasse", project=project)
tb = ToolBot(
username="lasse",
tools=[
"fetch_science_articles_tool",
"fetch_notes_tool",
"fetch_other_documents_tool",
"fetch_science_articles_and_other_documents_tool",
]
)
# ra = ResearchAssistant(username="lasse")
hb = HelperBot(username="lasse")
question = "Tell me five interesting facts about the Monarch butterfly"
# Generate plan
plan = rm.generate_plan(question)
# -- Example of what a plan can look like --
# plan = """## Step-by-Step Plan for Answering the Question: "Tell Me Five Interesting Facts About the Monarch Butterfly"
# ### Step 1: Gather and Organize Existing Materials
# - **Task 1:** Retrieve all existing materials related to Monarch butterflies from the database using keywords such as "Monarch butterfly migration," "habitat loss," "milkweed," "insecticides," "climate change," "Monarch Butterfly Biosphere Reserve," and "migration patterns."
# - **Task 2:** Categorize these materials into scientific articles, other articles (blogs, news), own notes, and transcribed interviews for easy access.
# ### Step 2: Extract Relevant Excerpts
# - **Task 1:** From the retrieved scientific articles, extract information on migration patterns, genetic studies, and population decline factors.
# - **Task 2:** From blogs and news articles, look for interesting anecdotes or recent findings about conservation efforts and unique behaviors of Monarch butterflies.
# ### Step 3: Identify Potential Interesting Facts
# - **Task 1:** Review the extracted excerpts to identify potential facts such as migration patterns, threats faced by Monarchs, population decline statistics, conservation efforts, and unique behaviors.
# - **Task 2:** Compile a list of five compelling and accurate facts based on the extracted information.
# ### Step 4: Verify Information
# - **Task 1:** Cross-check each fact with multiple sources to ensure accuracy. For example, verify migration details across scientific articles and recent news reports.
# - **Task 2:** Look for consensus among sources regarding population trends and threats to Monarchs.
# ### Step 5: Structure the Report
# - **Task 1:** Organize the five selected facts into a coherent structure, ensuring each fact is clearly explained and engaging.
# - **Task 2:** Incorporate quotes or statistics from sources to add depth and credibility to each fact.
# ### Step 6: Review and Finalize
# - **Task 1:** Proofread the report for clarity, accuracy, and grammar.
# - **Task 2:** Ensure all information is presented in an engaging manner suitable for a journalistic report.
# This plan ensures that the journalist systematically gathers, verifies, and presents five interesting facts about Monarch butterflies, providing a comprehensive and accurate report.
# """
#print_blue(plan)
if "</think>" in plan:
plan = plan.split("</think>")[1]
# Make structured plan
structured_plan = hb.make_structured_plan(plan, question)
for step, tasks in structured_plan.steps.items():
print_blue("\n### Step:", step)
for task in tasks:
print_blue("Task:", task[0])
print_yellow(task[1])
tools = tb.propose_tools(task[1])
print_green("Tools:", tools)
print('\n')
Loading…
Cancel
Save