Added Research etc

8 months ago · ba7eaaed2a
parent 83044a905b
commit ba7eaaed2a
19 changed files with 3189 additions and 584 deletions
--- a/_arango.py
+++ b/_arango.py
@ -37,6 +37,7 @@ class ArangoDB:
        self.db = self.client.db(db_name, username=user, password=password)


+
    def fix_key(self, _key):
        """
        Sanitize a given key by replacing all characters that are not alphanumeric, 
--- a/_base_class.py
+++ b/_base_class.py
@ -23,7 +23,6 @@ class BaseClass:
            return ArangoDB()
        else:
            from colorprinter.print_color import print_yellow
-            print_yellow(f"User: {self.username}")
            return ArangoDB(user=self.username, db_name=self.username)

    def get_article_collections(self) -> list:
@ -81,10 +80,46 @@ class BaseClass:


 class StreamlitBaseClass(BaseClass):
+    """
+    StreamlitBaseClass is a base class for Streamlit applications that provides methods for managing user settings, session state, and user interactions with collections and projects.
+    Methods:
+        __init__(username: str, **kwargs) -> None:
+            Initializes the StreamlitBaseClass with a username and additional keyword arguments.
+        get_settings(field: str = None):
+            Retrieves user settings from the database. If a specific field is provided, returns the value of that field. Otherwise, returns all settings.
+        update_settings(key, value) -> None:
+            Updates a specific setting in the database and the Streamlit session state.
+        get_settings():
+            Retrieves user settings from the database.
+        update_session_state(page_name=None):
+            Updates the Streamlit session state with the attributes of the current instance. If a page name is provided, updates the session state for that page.
+        update_current_page(page_name):
+            Updates the current page in the Streamlit session state and the database.
+        choose_collection(text="Select a collection of favorite articles") -> str:
+            Displays a select box for choosing a collection of favorite articles. Updates the current collection in the session state and the database.
+        choose_project(text="Select a project") -> str:
+            Displays a select box for choosing a project. Updates the current project in the session state and the database.
+        """
    def __init__(self, username: str, **kwargs) -> None:
        super().__init__(username, **kwargs)
    
-    def get_settings(self):
+    def get_settings(self, field: str = None):
+        """
+        Retrieve or initialize user settings from the database.
+
+        This method fetches the user settings document from the "settings" collection
+        in the ArangoDB database. If the settings document does not exist, it initializes
+        it with default values for "current_collection" and "current_page". The settings
+        are then stored in the Streamlit session state.
+
+        Args:
+            field (str, optional): The specific field to retrieve from the settings. 
+                                   If not provided, the entire settings document is returned.
+
+        Returns:
+            dict or any: The entire settings document if no field is specified, 
+                         otherwise the value of the specified field.
+        """
        settings = self.user_arango.db.document("settings/settings")
        if not settings:
            self.user_arango.db.collection("settings").insert(
@ -94,9 +129,21 @@ class StreamlitBaseClass(BaseClass):
            if i not in settings:
                settings[i] = None
        st.session_state["settings"] = settings
+        if field:
+            return settings[field]
        return settings

    def update_settings(self, key, value) -> None:
+        """
+        Update a specific setting in the database and session state.
+
+        Args:
+            key (str): The key of the setting to update.
+            value (Any): The new value for the setting.
+
+        Returns:
+            None
+        """
        self.user_arango.db.collection("settings").update_match(
            filters={"_key": "settings"},
            body={key: value},
@ -104,11 +151,6 @@ class StreamlitBaseClass(BaseClass):
        )
        st.session_state["settings"][key] = value

-    def get_settings(self):
-        return self.user_arango.db.document("settings/settings")
-
-
-
    def update_session_state(self, page_name=None):
        """
        Updates the Streamlit session state with the attributes of the current instance.
@ -135,12 +177,36 @@ class StreamlitBaseClass(BaseClass):
        #         print(k.upper(), v)

    def update_current_page(self, page_name):
+        """
+        Updates the current page in the session state and settings.
+
+        Args:
+            page_name (str): The name of the page to set as the current page.
+
+        Side Effects:
+            Updates the "current_page" in the session state and settings if it is different from the current value.
+        """
        if st.session_state.get("current_page") != page_name:
            st.session_state["current_page"] = page_name
            self.update_settings("current_page", page_name)

    
    def choose_collection(self, text="Select a collection of favorite articles") -> str:
+        """
+        Prompts the user to select a collection of favorite articles from a list.
+
+        Args:
+            text (str): The prompt text to display for the selection box. Defaults to "Select a collection of favorite articles".
+
+        Returns:
+            str: The name of the selected collection.
+
+        Side Effects:
+            - Sets the `project` attribute to None.
+            - Sets the `collection` attribute to the selected collection.
+            - Updates the settings with the key "current_collection" to the selected collection.
+            - Updates the session state.
+        """
        collections = self.get_article_collections()
        collection = st.selectbox(text, collections, index=None)
        if collection:
@ -151,12 +217,28 @@ class StreamlitBaseClass(BaseClass):
            return collection
    
    def choose_project(self, text="Select a project") -> str:
-        projects = self.get_projects()
+        """
+        Prompts the user to select a project from a list of available projects.
+
+        Args:
+            text (str): The prompt text to display for project selection. Defaults to "Select a project".
+
+        Returns:
+            str: The name of the selected project.

-        project = st.selectbox(text, projects, index=None)
+        Side Effects:
+            - Updates the current project settings.
+            - Updates the session state.
+            - Prints the chosen project name to the console.
+        """
+        projects = self.get_projects()
+        print('projects', projects)
+        print(self.project_name)
+        
+        project = st.selectbox(text, projects, index=projects.index(self.project_name) if self.project_name in projects else None)
        print('Choosing project...')
        if project:
-            from _classes import Project
+            from projects_page import Project
            self.project = Project(self.username, project, self.user_arango)
            self.collection = None
            self.update_settings("current_project", self.project.name)
--- a/_chromadb.py
+++ b/_chromadb.py
@ -69,6 +69,8 @@ class ChromaDB:
            n += 1
            if n > max_retries:
                break
+            if where == {}:
+                where = None 
            r = col.query(
                query_texts=query,
                n_results=n_results - len(sources),
--- a/_classes.py
+++ b/_classes.py
@ -1,314 +0,0 @@
-
-import streamlit as st
-from time import sleep
-from datetime import datetime, timedelta
-from colorprinter.print_color import *
-
-
-from _base_class import StreamlitBaseClass
-from _rss import RSSReader
-from projects_page import Project
-from streamlit_chatbot import StreamlitChat
-
-
-class BotChatPage(StreamlitBaseClass):
-    def __init__(self, username):
-        super().__init__(username=username)
-        self.collection_name = None
-        self.project_name = None
-        self.project: Project = None
-        self.chat = None
-        self.role = "Research Assistant"  # Default persona
-        self.page_name = "Bot Chat"
-        self.chat_key = None
-
-        # Initialize attributes from session state if available
-        if self.page_name in st.session_state:
-            for k, v in st.session_state[self.page_name].items():
-                setattr(self, k, v)
-
-    def run(self):
-        from streamlit_chatbot import EditorBot, ResearchAssistantBot, PodBot, StreamlitBot
-        bot = None
-        self.update_current_page("Bot Chat")
-        self.remove_old_unsaved_chats()
-        self.sidebar_actions()
-
-        if self.collection_name or self.project:
-            print_purple("Collection:", self.collection_name, "Project:", self.project_name)
-            # If no chat exists, create a new Chat instance
-            self.chat = self.get_chat(role=self.role)
-
-            # Create a Bot instance with the Chat object
-            if self.role == "Research Assistant":
-                print_blue("Creating Research Assistant Bot")
-                bot = ResearchAssistantBot(
-                    username=self.username,
-                    chat=self.chat,
-                    collection=self.collection_name,
-                    project=self.project,
-                )
-
-            elif self.role == "Editor":
-                bot = EditorBot(
-                    username=self.username,
-                    chat=self.chat,
-                    collection=self.collection,
-                    project=self.project,
-                )
-            elif self.role == "Podcast":
-                st.session_state["make_podcast"] = True
-                # with st.sidebar:
-                with st.sidebar:
-                    with st.form("make_podcast_form"):
-                        instructions = st.text_area(
-                            "What should the podcast be about? Give a brief description, as if you were the producer."
-                        )
-                        start = st.form_submit_button("Make Podcast!")
-                        if start:
-                            bot = PodBot(
-                                subject=self.project.name,
-                                username=self.username,
-                                chat=self.chat,
-                                collection=self.collection,
-                                project=self.project,
-                                instructions=instructions,
-                            )
-
-            # Run the bot (this will display chat history and process user input)
-            if bot:
-                bot.run()
-
-            # Save updated chat state to session state
-            st.session_state[self.page_name] = {
-                "collection": self.collection,
-                "project": self.project,
-                "chat": self.chat,
-                "role": self.role,
-            }
-        else: # If no collection or project is selected, use the conversational response bot           
-            print_yellow("No collection or project selected. Using conversational response bot.")
-            bot = StreamlitBot(
-                username=self.username,
-                chat=self.get_chat(),
-                tools=["conversational_response_tool"],
-            )
-            bot.run()
-
-
-    def get_chat(self, role="Research Assistant"):
-        print_blue('CHAT TYPE:', role)
-        if 'chat_key' not in st.session_state:
-            chat = StreamlitChat(username=self.username, role=role)
-            st.session_state['chat_key'] = chat._key
-            print_blue("Creating new chat:", st.session_state['chat_key'])
-        else:
-            print_blue("Old chat:", st.session_state['chat_key'])
-            chat_data = self.user_arango.db.collection("chats").get(st.session_state['chat_key'])
-            chat = StreamlitChat.from_dict(chat_data)
-        return chat
-
-    def sidebar_actions(self):
-        with st.sidebar:
-            self.collection = self.choose_collection(
-                "Article collection to use for chat:"
-            )
-            self.project = self.choose_project("Project to use for chat:")
-
-            if self.collection or self.project:
-                st.write("---")
-                if self.project:
-                    self.role = st.selectbox(
-                        "Choose Bot Role",
-                        options=["Research Assistant", "Editor", "Podcast"],
-                        index=0,
-                    )
-                elif self.collection:
-                    self.role = "Research Assistant"
-
-                # Load existing chats from the database
-                if self.project:
-                    chat_history = list(
-                        self.user_arango.db.aql.execute(
-                            f'FOR doc IN chats FILTER doc["project"] == "{self.project}" RETURN {{"_key": doc["_key"], "name": doc["name"]}}'
-                        )
-                    )
-                    # self.project = Project(username=self.username, project_name=self.project_name, user_arango=self.user_arango)
-                elif self.collection:
-                    chat_history = list(
-                        self.user_arango.db.aql.execute(
-                            f'FOR doc IN chats FILTER doc["collection"] == "{self.collection}" RETURN {{"_key": doc["_key"], "name": doc["name"]}}'
-                        )
-                    )
-
-                chats = {i["name"]: i["_key"] for i in chat_history}
-                selected_chat = st.selectbox(
-                    "Continue another chat", options=[""] + list(chats.keys()), index=0
-                )
-                if selected_chat:
-                    st.session_state["chat_key"] = chats[selected_chat]
-                    self.chat = self.get_chat()
-            
-            if not self.role:
-                self.role == "Research Assistant"
-
-    def remove_old_unsaved_chats(self):
-        two_weeks_ago = datetime.now() - timedelta(weeks=2)
-        q = f'FOR doc IN chats FILTER doc.saved == false AND doc.last_updated < "{two_weeks_ago.isoformat()}" RETURN doc'
-        print_blue(q)
-        old_chats = self.user_arango.db.aql.execute(
-            f'FOR doc IN chats RETURN doc'
-        )
-        print('test', old_chats)
-        old_chats = self.user_arango.db.aql.execute(
-            f'FOR doc IN chats FILTER doc.saved == false AND doc.last_updated < "{two_weeks_ago.isoformat()}" RETURN doc'
-        )
-        for chat in old_chats:
-            print_red(chat["_id"])
-            self.user_arango.db.collection("chats").delete(chat["_key"])
-
-
-class SettingsPage(StreamlitBaseClass):
-    def __init__(self, username: str):
-        super().__init__(username=username)
-
-    def run(self):
-        self.update_current_page("Settings")
-        self.set_profile_picture()
-        self.use_reasoning_model()
-
-    def set_profile_picture(self):
-        st.markdown("Profile picture")
-        profile_picture = st.file_uploader(
-            "Upload profile picture", type=["png", "jpg", "jpeg"]
-        )
-        if profile_picture:
-            # Resize the image to 64x64 pixels
-            from PIL import Image
-
-            img = Image.open(profile_picture)
-            img.thumbnail((64, 64))
-            img_path = f"user_data/{st.session_state['username']}/profile_picture.png"
-            img.save(img_path)
-            self.update_settings("avatar", img_path)
-            st.success("Profile picture uploaded")
-            sleep(1)
-
-    def use_reasoning_model(self):
-        """
-        Displays a checkbox in the Streamlit interface to enable or disable the reasoning model for generating responses in chats.
-        
-        Retrieves the current settings and checks if the "use_reasoning_model" key exists. If not, it initializes it to False.
-        Then, it displays a markdown text and a checkbox for the user to toggle the reasoning model usage.
-        The updated setting is saved back to the settings.
-
-        Returns:
-            None
-        """
-        settings = self.get_settings()
-        if "use_reasoning_model" not in settings:
-            settings["use_reasoning_model"] = False
-        st.markdown("Use Reasoning Model")
-
-        use_reasoning_model = st.checkbox("Use Reasoning Model", value=settings["use_reasoning_model"], help="Use the reasoning model to generate responses in chats. This may take longer to process.")
-        self.update_settings("use_reasoning_model", use_reasoning_model)
-
-
-class RSSFeedsPage(StreamlitBaseClass):
-    def __init__(self, username: str):
-        super().__init__(username=username)
-        self.page_name = "RSS Feeds"
-        self.reader = RSSReader(username=username)
-        # Initialize attributes from session state if available
-        for k, v in st.session_state.get(self.page_name, {}).items():
-            setattr(self, k, v)
-
-    def run(self):
-        if "selected_feed" not in st.session_state:
-            st.session_state["selected_feed"] = None
-        self.update_current_page(self.page_name)
-        self.display_feed()
-        self.sidebar_actions()
-        self.update_session_state(page_name=self.page_name)
-
-    def select_rss_feeds(self):
-        rss_feeds = self.reader.get_rss_feeds()
-        if rss_feeds:
-            feed_options = [feed["title"] for feed in rss_feeds]
-            with st.sidebar:
-                st.subheader("Show your feeds")
-                selected_feed_title = st.selectbox(
-                    "Select a feed", options=feed_options, index=None
-                )
-                if selected_feed_title:
-                    st.session_state["selected_feed"] = [
-                        feed["_key"]
-                        for feed in rss_feeds
-                        if feed["title"] == selected_feed_title
-                    ][0]
-                    st.rerun()
-        else:
-            st.write("You have no RSS feeds added.")
-
-    def search_feeds(self, rss_url):
-        with st.spinner("Discovering feeds..."):
-            feeds = self.reader.discover_feeds(rss_url)
-            if feeds:
-                st.session_state["discovered_feeds"] = feeds
-            else:
-                st.error("No RSS feeds found at the provided URL.")
-
-    def sidebar_actions(self):
-        if "discovered_feeds" not in st.session_state:
-            st.session_state["discovered_feeds"] = None
-
-        with st.sidebar:
-            self.select_rss_feeds()
-            st.subheader("Add a New RSS Feed")
-            with st.form("add_rss_feed"):
-                rss_url = st.text_input("Website URL or RSS Feed URL")
-                submitted = st.form_submit_button("Discover Feeds")
-                if submitted:
-                    print_green(rss_url)
-                    feeds = self.reader.discover_feeds(rss_url)
-                    st.session_state["discovered_feeds"] = feeds
-
-            if st.session_state["discovered_feeds"]:
-                st.subheader("Select a Feed to Add")
-                feeds = st.session_state["discovered_feeds"]
-                feed_options = [f"{feed['title']} ({feed['href']})" for feed in feeds]
-                selected_feed = st.selectbox("Available Feeds", options=feed_options)
-                selected_feed_url = feeds[feed_options.index(selected_feed)]["href"]
-
-                if st.button("Preview Feed"):
-                    feed = self.reader.parse_feed(selected_feed_url)
-                    st.write(f"{feed.title}")
-                    description = self.reader.html_to_markdown(feed.description)
-                    st.write(f"_{description}_")
-                    for entry in feed.entries[:5]:
-                        with st.expander(entry["title"]):
-                            summary = entry.get("summary", "No summary available")
-                            markdown_summary = self.reader.html_to_markdown(summary)
-                            st.markdown(markdown_summary)
-                    print_yellow(selected_feed_url)
-
-                if st.button(
-                    "Add RSS Feed",
-                    on_click=self.reader.add_rss_feed,
-                    args=[selected_feed_url],
-                ):
-                    del st.session_state["discovered_feeds"]
-                    st.success("RSS Feed added.")
-                    st.rerun()
-
-    def display_feed(self):
-        if st.session_state["selected_feed"]:
-            self.reader.get_feed(st.session_state["selected_feed"])
-            st.title(self.reader.feed.title)
-            st.write(f"_{self.reader.feed.description}_")
-            for entry in self.reader.feed.entries[:5]:
-                with st.expander(entry["title"]):
-                    summary = entry.get("summary", "No summary available")
-                    markdown_summary = self.reader.html_to_markdown(summary)
-                    st.markdown(markdown_summary)
-                    st.markdown(f"[Read more]({entry['link']})")
--- a/_llm.py
+++ b/_llm.py
@ -35,6 +35,7 @@ class LLM:
        chosen_backend (str): The chosen backend server for the API.
        client (Client): The client for synchronous API calls.
        async_client (AsyncClient): The client for asynchronous API calls.
+        tools (list): List of tools to be used in generating the response.

    Methods:
        __init__(self, system_message, temperature, model, max_length_answer, messages, chat, chosen_backend):
@ -75,6 +76,7 @@ class LLM:
        messages: list[dict] = None,
        chat: bool = True,
        chosen_backend: str = None,
+        tools: list = None,
    ) -> None:
        """
        Initialize the assistant with the given parameters.
@ -112,23 +114,28 @@ class LLM:
            "X-Chosen-Backend": self.chosen_backend,
        }
        self.host_url = os.getenv("LLM_API_URL").rstrip("/api/chat/")
-        self.client: Client = Client(host=self.host_url, headers=headers)           
+        self.host_url = 'http://192.168.1.12:3300' #! Change back when possible
+        self.client: Client = Client(host=self.host_url, headers=headers, timeout=120)           
        self.async_client: AsyncClient = AsyncClient()

    def get_credentials(self):
        # Initialize the client with the host and default headers
        credentials = f"{os.getenv('LLM_API_USER')}:{os.getenv('LLM_API_PWD_LASSE')}"
        return base64.b64encode(credentials.encode()).decode()
+    
    def get_model(self, model_alias):
+
        models = {
            "standard": "LLM_MODEL",
            "small": "LLM_MODEL_SMALL",
            "vision": "LLM_MODEL_VISION",
-            "standard_64k": "LLM_MODEL_64K",
+            "standard_64k": "LLM_MODEL_LARGE",
            "reasoning": "LLM_MODEL_REASONING",
            "tools": "LLM_MODEL_TOOLS",
        }
-        return os.getenv(models.get(model_alias, "LLM_MODEL"))
+        model = os.getenv(models.get(model_alias, "LLM_MODEL"))
+        self.model = model
+        return model

    def count_tokens(self):
        num_tokens = 0
@ -165,6 +172,7 @@ class LLM:
        ] = None,
        temperature: float = None,
        messages: list[dict] = None,
+        format = None
    ):
        """
        Generate a response based on the provided query and context.
@ -178,6 +186,7 @@ class LLM:
        model (Optional[Literal["small", "standard", "vision", "tools"]]): The model type to be used.
        temperature (float): The temperature setting for the model.
        messages (list[dict]): List of previous messages in the conversation.
+        format (Optional[BaseModel]): The format of the response.

        Returns:
        str: The generated response or an error message if an exception occurs.
@ -218,29 +227,31 @@ class LLM:
            headers["X-Model-Type"] = "small"
        if model == self.get_model("tools"):
            headers["X-Model-Type"] = "tools"
+        elif model == self.get_model("reasoning"):
+            headers["X-Model-Type"] = "reasoning"

        # Prepare options
        options = Options(**self.options)
        options.temperature = temperature

-        # Adjust the options for long messages
-        if self.chat or len(self.messages) > 15000 and model != self.get_model("tools"):
-            num_tokens = self.count_tokens() + self.max_length_answer // 2
-            if num_tokens > 8000:
-                model = self.get_model("standard_64k")
-                print_purple("Switching to large model")
-                headers["X-Model-Type"] = "large"
+        #TODO This is a bit of a hack to get the reasoning model to work. It should be handled better.
+        # # Adjust the options for long messages
+        # if self.chat or len(self.messages) > 15000 and model != self.get_model("tools"):
+        #     num_tokens = self.count_tokens()
+        #     if num_tokens > 8000:
+        #         model = self.get_model("standard_64k")
+        #         print_purple("Switching to large model")
+        #         headers["X-Model-Type"] = "large"

        # Call the client.chat method
        try:
-            print('###########')
            self.call_model = model
-            print()
-            print('Headers:', headers)
-            print_yellow('Model:', model)
-            print()
-            headers['X-Chosen-Backend'] = 'backend_tools_server'
-            self.client: Client = Client(host=self.host_url, headers=headers)
+            self.client: Client = Client(host=self.host_url, headers=headers, timeout=300) #!
+            #print_rainbow(self.client._client.__dict__)
+            print_yellow("Model used in call:", model)
+            # if headers:
+            #     self.client.headers.update(headers)
+        
            response = self.client.chat(
                model=model,
                messages=self.messages,
@ -248,6 +259,7 @@ class LLM:
                stream=stream,
                options=options,
                keep_alive=3600 * 24 * 7,
+                format=format
            )

        except ResponseError as e:
@ -279,6 +291,8 @@ class LLM:
            # Process the response
            if isinstance(response, ChatResponse):
                result = response.message.content.strip('"')
+                if '</think>' in result:
+                    result = result.split('</think>')[-1]
                self.messages.append(
                    {"role": "assistant", "content": result.strip('"')}
                )
@ -446,10 +460,6 @@ class LLM:
                headers["X-Model-Type"] = "large"

        # Call the async client's chat method
-        print()
-        print_rainbow(self.async_client.__dict__)
-        print(model, headers, )
-        print()
        try:
            response = await self.async_client.chat(
                model=model,
@ -479,6 +489,7 @@ class LLM:
                    self.messages[0]["content"] += system_message_info
            self.messages[-1] = {"role": "user", "content": user_input}

+        print_red(self.async_client.last_response.headers.get("X-Chosen-Backend", "No backend"))
        # Update chosen_backend
        if model not in [self.get_model("vision"), self.get_model("tools"), self.get_model("reasoning")]:
            self.chosen_backend = self.async_client.last_response.headers.get(
--- a/agent_research.py
+++ b/agent_research.py
--- a/article2db.py
+++ b/article2db.py
@ -29,15 +29,15 @@ class Document:
    def __init__(
        self,
        pdf_file=None,
-        filename: str=None,
-        doi: str=None,
-        username: str=None,
-        is_sci: bool=None,
-        is_image: bool=False,
-        text: str=None,
-        _key: str=None,
-        arango_db_name: str=None,
-        arango_collection: str=None,
+        filename: str = None,
+        doi: str = None,
+        username: str = None,
+        is_sci: bool = None,
+        is_image: bool = False,
+        text: str = None,
+        _key: str = None,
+        arango_db_name: str = None,
+        arango_collection: str = None,
    ):
        self.filename = filename
        self.pdf_file = pdf_file
@ -63,11 +63,10 @@ class Document:
        if self.pdf_file:
            self.open_pdf(self.pdf_file)

-
    def make_summary_in_background(self):
        if not self._id and all([self.arango_collection, self._key]):
            self._id = f"{self.arango_collection}/{self._key}"
-        
+
        if not self._id:
            return
        data = {
@ -281,8 +280,19 @@ class Processor:
        self.arango_collection = arango_collection
        return arango_collection

-
    def extract_doi(self, text, multi=False):
+        """
+        Extracts the DOI (Digital Object Identifier) from the given text.
+
+        Args:
+            text (str): The text from which to extract the DOI.
+            multi (bool, optional): If True, extract multiple DOIs from the text. Defaults to False.
+
+        Returns:
+            str or list or None: 
+                - If multi is False, returns the extracted DOI as a string if found, otherwise None.
+                - If multi is True, returns a list of extracted DOIs if found, otherwise None.
+        """
        doi_pattern = r"10\.\d{4,9}/[-._;()/:A-Za-z0-9]+"

        if multi:
@ -297,7 +307,7 @@ class Processor:
                if self.get_crossref(doi):
                    self.document.metadata = self.get_crossref(doi)
                    self.document.doi = doi
-                else:
+                elif self.document.pdf:
                    for page in self.document.pdf.pages(0, 6):
                        text = page.get_text()
                        if re.search(doi_pattern, text):
@ -316,18 +326,20 @@ class Processor:
                            I want you to find the DOI of the article. Ansewer ONLY with the DOI, nothing else. 
                            If you can't find the DOI, answer "not_found".
                            '''
-                            st.write('Trying to extract DOI from text using LLM...')
-                            doi = llm.generate(prompt).replace('https://doi.org/', '')
+                            st.write("Trying to extract DOI from text using LLM...")
+                            doi = llm.generate(prompt).replace("https://doi.org/", "")
                            if doi == "not_found":
                                return None
                            else:
                                doi = re.search(doi_pattern, doi).group()
                            break
+                else:
+                    print_yellow(f"DOI not extracted: {doi}")

                return doi
            else:
                return None
-            
+
    def chunks2chroma(self, _id, key):
        st.write("Adding to vector database...")
        assert self.document.text, "Document must have 'text' attribute."
@ -442,7 +454,11 @@ class Processor:
                    )
                    arango_document["metadata"] = self.document.metadata
                    arango_document["summary"] = {
-                        "text_sum": self.document.metadata["abstract"],
+                        "text_sum": (
+                            self.document.metadata["abstract"]["text_sum"]
+                            if "text_sum" in self.document.metadata["abstract"]
+                            else self.document.metadata["abstract"]
+                        ),
                        "meta": {"model": "from_metadata"},
                    }

@ -608,7 +624,6 @@ class Processor:
            if not self.document.is_sci:
                self.document.is_sci = bool(self.document.metadata)

-
        arango_collection = self.get_arango()

        doc = arango_collection.get(self.document._key) if self.document.doi else None
@ -624,21 +639,22 @@ class Processor:
                    "title": self.document.get_title(only_meta=True)
                }

-            elif 'title' not in doc['metadata']:
-                self.document.doc["metadata"]["title"] = self.document.get_title(only_meta=True)
-
+            elif "title" not in doc["metadata"]:
+                self.document.doc["metadata"]["title"] = self.document.get_title(
+                    only_meta=True
+                )

-            if "user_access" not in doc or doc['user_access'] == None:
+            if "user_access" not in doc or doc["user_access"] == None:
                self.document.doc["user_access"] = [self.document.username]
            else:
-                if self.document.username not in doc['user_access']:
+                if self.document.username not in doc["user_access"]:
                    self.document.doc["user_access"] = doc.get("user_access", []) + [
                        self.document.username
                    ]
            self.metadata = self.document.doc["metadata"]
            arango_collection.update(self.document.doc)
            return doc["_id"], arango_collection.db_name, self.document.doi
-        
+
        else:
            self.document.doc = (
                {"doi": self.document.doi, "_key": fix_key(self.document.doi)}
@ -665,6 +681,8 @@ class Processor:
                        only_meta=True
                    )
            if "_key" not in self.document.doc:
+                if not self.document.metadata:
+                    self.document.metadata = {}

                if self.document.doi:
                    _key = self.document.doi
@ -672,7 +690,10 @@ class Processor:
                    _key = self.document.title
                elif self.document.get_title():
                    _key = self.document.get_title()
-                elif 'title' in self.document.doc["metadata"] and self.document.doc["metadata"]["title"]:
+                elif (
+                    "title" in self.document.doc["metadata"]
+                    and self.document.doc["metadata"]["title"]
+                ):
                    _key = self.document.doc["metadata"]["title"]
                else:
                    _key = self.document.pdf_file.name
--- a/chat_page.py
+++ b/chat_page.py
@ -0,0 +1,236 @@
+
+import streamlit as st
+from datetime import datetime, timedelta
+from colorprinter.print_color import *
+
+
+from _base_class import StreamlitBaseClass
+from _rss import RSSReader
+from projects_page import Project
+from streamlit_chatbot import StreamlitChat, StreamlitBot
+
+
+class BotChatPage(StreamlitBaseClass):
+    """
+    BotChatPage - A Streamlit interface for chatting with various AI assistants.
+    This class provides a user interface for interacting with different types of AI bots
+    (Research Assistant, Editor, Podcast) that can access and work with user's collections
+    and projects.
+    Attributes:
+        username (str): The username of the current user.
+        collection_name (str): Name of the selected collection.
+        project_name (str): Name of the selected project.
+        project (Project): Project instance the chat is associated with.
+        chat (StreamlitChat): Chat instance for maintaining conversation history.
+        role (str): The selected bot persona, default is "Research Assistant".
+        page_name (str): Name of the current page ("Bot Chat").
+        chat_key (str): Unique identifier for the current chat session.
+        bot (StreamlitBot): Instance of the selected bot type.
+    Methods:
+        run(): Main method to render the chat interface and handle interactions.
+        get_chat(role, new_chat): Retrieves existing chat or creates a new one.
+        sidebar_actions(): Renders sidebar elements for selecting collections, projects, and chat options.
+        remove_old_unsaved_chats(): Cleans up unsaved chats older than two weeks.
+    """
+    def __init__(self, username):
+        super().__init__(username=username)
+        self.collection_name = None
+        self.project_name = None
+        self.project: Project = None
+        self.chat = None
+        self.role = "Research Assistant"  # Default persona
+        self.page_name = "Bot Chat"
+        self.chat_key = None
+        self.bot: StreamlitBot = None
+
+        # Initialize attributes from session state if available
+        if self.page_name in st.session_state:
+            for k, v in st.session_state[self.page_name].items():
+                setattr(self, k, v)
+
+    def run(self):
+        from streamlit_chatbot import EditorBot, ResearchAssistantBot, PodBot, StreamlitBot
+        self.bot: StreamlitBot = None
+        self.update_current_page("Bot Chat")
+        self.remove_old_unsaved_chats()
+        self.sidebar_actions()
+
+        if self.collection_name or self.project:
+            print_purple("Collection:", self.collection_name, "Project:", self.project_name)
+            # If no chat exists, create a new Chat instance
+            self.chat = self.get_chat(role=self.role)
+
+            # Create a Bot instance with the Chat object
+            if self.role == "Research Assistant":
+                print_blue("Creating Research Assistant Bot")
+                self.bot: ResearchAssistantBot = ResearchAssistantBot(
+                    username=self.username,
+                    chat=self.chat,
+                    collection=self.collection_name,
+                    project=self.project,
+                    tools=[
+                    "fetch_other_documents_tool",
+                    "fetch_science_articles_tool",
+                    "fetch_science_articles_and_other_documents_tool",
+                    "conversational_response_tool"]
+                )
+
+            elif self.role == "Editor":
+                self.bot: StreamlitBot = EditorBot(
+                    username=self.username,
+                    chat=self.chat,
+                    collection=self.collection,
+                    project=self.project,
+                    tools=[
+                    "fetch_other_documents_tool",
+                    "fetch_notes_tool",
+                    "conversational_response_tool"]
+                )
+
+            elif self.role == "Podcast":
+                st.session_state["make_podcast"] = True
+                # with st.sidebar:
+                with st.sidebar:
+                    with st.form("make_podcast_form"):
+                        instructions = st.text_area(
+                            "What should the podcast be about? Give a brief description, as if you were the producer."
+                        )
+                        start = st.form_submit_button("Make Podcast!")
+                        if start:
+                            bot = PodBot(
+                                subject=self.project.name,
+                                username=self.username,
+                                chat=self.chat,
+                                collection=self.collection,
+                                project=self.project,
+                                instructions=instructions                                
+                            )
+
+            # Save updated chat state to session state
+            st.session_state[self.page_name] = {
+                "collection": self.collection,
+                "project": self.project,
+                "chat": self.chat,
+                "role": self.role,
+            }
+
+            # Run the bot (this will display chat history and process user input)
+            if self.bot:
+                self.bot.run()
+
+        else: # If no collection or project is selected, use the conversational response bot           
+            print_yellow("No collection or project selected. Using conversational response bot.")
+            self.bot: StreamlitBot = StreamlitBot(
+                username=self.username,
+                chat=self.get_chat(),
+                tools=["conversational_response_tool"],
+            )
+            self.bot.run()
+
+
+    def get_chat(self, role="Research Assistant", new_chat=False):
+        """
+        Retrieves or creates a chat session.
+        
+        This method handles chat session management by either creating a new chat,
+        retrieving an existing one from the database, or initializing a chat when
+        none exists in the session state.
+        
+        Parameters:
+        -----------
+        role : str, optional
+            The role assigned to the chat (default is "Research Assistant").
+        new_chat : bool, optional
+            If True, creates a new chat regardless of existing sessions (default is False).
+            
+        Returns:
+        --------
+        StreamlitChat
+            A chat instance either newly created or retrieved from the database.
+            
+        Notes:
+        ------
+        - If new_chat is True, a new chat is always created
+        - If no chat exists in session state, a new one is created
+        - Otherwise, retrieves the existing chat from the database using the chat_key in session state
+        """
+        print_blue('CHAT TYPE:', role)
+        if new_chat:
+            chat = StreamlitChat(username=self.username, role=role)
+            st.session_state['chat_key'] = chat._key
+            print_blue("Creating new chat:", st.session_state['chat_key'])
+        elif 'chat_key' not in st.session_state:
+            chat = StreamlitChat(username=self.username, role=role)
+            st.session_state['chat_key'] = chat._key
+            print_blue("Creating new chat:", st.session_state['chat_key'])
+        else:
+            print_blue("Old chat:", st.session_state['chat_key'])
+            chat_data = self.user_arango.db.collection("chats").get(st.session_state['chat_key'])
+            chat = StreamlitChat.from_dict(chat_data)
+        return chat
+
+    def sidebar_actions(self):
+        with st.sidebar:
+            with st.form("select_chat"):
+                self.collection = self.choose_collection("Article collection to use for chat:")
+                self.project = self.choose_project("Project to use for chat:")
+                submitted = st.form_submit_button("Select Collection/Project")
+
+            with st.form("chat_settings"):
+                if submitted or any([self.collection, self.project]):
+                    if self.project:
+                        self.role = st.selectbox(
+                            "Choose Bot Role",
+                            options=["Research Assistant", "Editor", "Podcast"],
+                            index=0,
+                        )
+                    elif self.collection:
+                        self.role = "Research Assistant"
+
+                    # Load existing chats from the database
+                    if self.project:
+                        chat_history = list(
+                            self.user_arango.db.aql.execute(
+                                f'FOR doc IN chats FILTER doc["project"] == "{self.project}" RETURN {{"_key": doc["_key"], "name": doc["name"]}}'
+                            )
+                        )
+                        # self.project = Project(username=self.username, project_name=self.project_name, user_arango=self.user_arango)
+                    elif self.collection:
+                        chat_history = list(
+                            self.user_arango.db.aql.execute(
+                                f'FOR doc IN chats FILTER doc["collection"] == "{self.collection}" RETURN {{"_key": doc["_key"], "name": doc["name"]}}'
+                            )
+                        )
+
+                    chats = {i["name"]: i["_key"] for i in chat_history}
+                    selected_chat = st.selectbox(
+                        "Continue another chat", options=[""] + list(chats.keys()), index=None
+                    )
+                
+                    if not self.role:
+                        self.role == "Research Assistant"
+
+                    start_chat = st.form_submit_button("Start Chat")
+                    if start_chat:
+                        if selected_chat:
+                            st.session_state["chat_key"] = chats[selected_chat]
+                            self.chat = self.get_chat()
+                        else:
+                            self.chat = self.get_chat(role=self.role, new_chat=True)
+                        st.rerun()
+
+    def remove_old_unsaved_chats(self):
+        two_weeks_ago = datetime.now() - timedelta(weeks=2)
+        q = f'FOR doc IN chats FILTER doc.saved == false AND doc.last_updated < "{two_weeks_ago.isoformat()}" RETURN doc'
+        print_blue(q)
+        old_chats = self.user_arango.db.aql.execute(
+            f'FOR doc IN chats RETURN doc'
+        )
+        print('test', old_chats)
+        old_chats = self.user_arango.db.aql.execute(
+            f'FOR doc IN chats FILTER doc.saved == false AND doc.last_updated < "{two_weeks_ago.isoformat()}" RETURN doc'
+        )
+        for chat in old_chats:
+            print_red(chat["_id"])
+            self.user_arango.db.collection("chats").delete(chat["_key"])
+
--- a/collections_page.py
+++ b/collections_page.py
@ -11,7 +11,7 @@ from colorprinter.print_color import *
 class ArticleCollectionsPage(StreamlitBaseClass):
    def __init__(self, username: str):
        super().__init__(username=username)
-        self.collection = None
+        self.collection = self.get_settings()["current_collection"]
        self.page_name = "Article Collections"

        # Initialize attributes from session state if available
@ -49,6 +49,7 @@ class ArticleCollectionsPage(StreamlitBaseClass):
    def choose_collection(self):
        collections = self.get_article_collections()
        current_collection = self.collection
+        print_yellow(f"Current collection: {current_collection}")
        preselected = (
            collections.index(current_collection)
            if current_collection in collections
@ -143,26 +144,33 @@ class ArticleCollectionsPage(StreamlitBaseClass):
            )
            collection_articles += list(cursor)

-        # Sort articles by title
+        # Filter out None values and sort articles by title
        collection_articles = sorted(
-            collection_articles,
-            key=lambda x: x.get("metadata", {}).get("title", "No Title"),
+            [article for article in collection_articles if article is not None],
+            key=lambda x: (
+                x.get("metadata", {}).get("title", "No Title")
+                if x.get("metadata") is not None
+                else "No Title"
+            ),
        )
-
        if collection_articles:
            st.markdown(f"#### Articles in *{self.collection}*:")
            for article in collection_articles:
                if article is None:
                    continue
-                metadata = article.get("metadata")
-                if metadata is None:
-                    continue
-
-                title = metadata.get("title", "No Title").strip()
-                journal = metadata.get("journal", "No Journal").strip()
-                published_year = metadata.get("published_year", "No Year")
-                published_date = metadata.get("published_date", None)
-                language = metadata.get("language", "No Language")
+                metadata = article.get("metadata", {})
+                if metadata:
+                    title = metadata.get("title", "No Title").strip()
+                    journal = metadata.get("journal", "No Journal").strip()
+                    published_year = metadata.get("published_year", "No Year")
+                    published_date = metadata.get("published_date", None)
+                    language = metadata.get("language", "No Language")
+                else:
+                    title = "No Title"
+                    journal = "No Journal"
+                    published_year = "No Year"
+                    published_date = None
+                    language = "No Language"
                icon = country_emojis.get(language.upper(), "") if language else ""

                expander_title = f"**{title}** *{journal}* ({published_year}) {icon}"
@ -202,9 +210,7 @@ class ArticleCollectionsPage(StreamlitBaseClass):
                    # Let the user add notes to the article, if it's not a scientific article
                    # if not article._id.startswith("sci_articles"):
                    if "user_notes" in article and article["user_notes"]:
-                        st.markdown(
-                            f":blue[**Your notes:**]"
-                        )
+                        st.markdown(f":blue[**Your notes:**]")
                        note_number = 0
                        for note in article["user_notes"]:
                            note_number += 1
@ -212,17 +218,20 @@ class ArticleCollectionsPage(StreamlitBaseClass):
                            with c1:
                                st.markdown(f":blue[{note}]")
                            with c2:
-                                st.button(key=f'{article["_key"]}_{note_number}',
+                                st.button(
+                                    key=f'{article["_key"]}_{note_number}',
                                    label=f":red[Delete note]",
                                    on_click=self.delete_article_note,
                                    args=(article, note),
                                )

-                    with st.form(f"add_info_form_{article['_id']}", clear_on_submit=True):
+                    with st.form(
+                        f"add_info_form_{article['_id']}", clear_on_submit=True
+                    ):
                        new_info = st.text_area(
                            ":blue[Add a note about the article]",
                            key=f'new_info_{article["_id"]}',
-                            help="Add information such as what kind of article it is, what it's about, who's the author, etc.", 
+                            help="Add information such as what kind of article it is, what it's about, who's the author, etc.",
                        )
                        submitted = st.form_submit_button(":blue[Add note]")
                        if submitted:
@ -234,7 +243,38 @@ class ArticleCollectionsPage(StreamlitBaseClass):
                        on_click=self.delete_article,
                        args=(self.collection, article["_id"]),
                    )
-                
+                    # Add info button and form
+
+                    st.markdown(":grey[Change metadata]")
+                    with st.form(f"update_metadata_form_{article['_id']}", clear_on_submit=True):
+
+                        new_title = st.text_input(
+                            ":blue[Update title]",
+                            key=f'new_metadata_{article["_id"]}_title',
+                            help="Update the title of the article.",
+                        )
+                        new_author = st.text_input(
+                            ":blue[Update author]",
+                            key=f'new_metadata_{article["_id"]}_author',
+                            help="Update the author of the article.",
+                        )
+                        new_journal = st.text_input(
+                            ":blue[Update journal]",
+                            key=f'new_metadata_{article["_id"]}_journal',
+                            help="Update the journal of the article.",
+                        )
+                        new_published_year = st.text_input(
+                            ":blue[Update published year]",
+                            key=f'new_metadata_{article["_id"]}_published_year',
+                            help="Update the published year of the article.",
+                        )
+                        submitted_metadata = st.form_submit_button(":blue[Add info]")
+                        if submitted_metadata:
+                            for info in ['new_title', 'new_author', 'new_journal', 'new_published_year']:
+                                if info:
+                                    self.update_article(article, "metadata", info)
+
+
        else:
            st.write("No articles in this collection.")

@ -389,10 +429,25 @@ class ArticleCollectionsPage(StreamlitBaseClass):
            self.update_session_state(page_name=self.page_name)

    def update_article(self, article, field, value):
-        "Update a field in an article document"
+        """
+        Update a specified field in an article with a new value.
+
+        If the field already exists and is a list, the new value is appended to the list.
+        If the field exists but is not a list, the field is converted to a list containing
+        the old and new values. If the field does not exist, it is created as a list with
+        the new value.
+
+        Args:
+            article (dict): The article to be updated.
+            field (str): The field in the article to be updated.
+            value (str): The new value to be added to the field.
+
+        Returns:
+            None
+        """
+
        value = str(value.strip())
-        print(value)
-        print(type(value))
+
        if field in article:
            if isinstance(article[field], list):
                article[field].append(value)
@ -409,4 +464,4 @@ class ArticleCollectionsPage(StreamlitBaseClass):
        if "user_notes" in article and note in article["user_notes"]:
            article["user_notes"].remove(note)
            self.user_arango.db.update_document(article, check_rev=False, silent=True)
-            sleep(0.1)
+            sleep(0.1)
--- a/feed_page.py
+++ b/feed_page.py
@ -0,0 +1,103 @@
+from _rss import RSSReader
+import streamlit as st
+from _base_class import StreamlitBaseClass
+from colorprinter.print_color import *
+
+class RSSFeedsPage(StreamlitBaseClass):
+    def __init__(self, username: str):
+        super().__init__(username=username)
+        self.page_name = "RSS Feeds"
+        self.reader = RSSReader(username=username)
+        # Initialize attributes from session state if available
+        for k, v in st.session_state.get(self.page_name, {}).items():
+            setattr(self, k, v)
+
+    def run(self):
+        if "selected_feed" not in st.session_state:
+            st.session_state["selected_feed"] = None
+        self.update_current_page(self.page_name)
+        self.display_feed()
+        self.sidebar_actions()
+        self.update_session_state(page_name=self.page_name)
+
+    def select_rss_feeds(self):
+        rss_feeds = self.reader.get_rss_feeds()
+        if rss_feeds:
+            feed_options = [feed["title"] for feed in rss_feeds]
+            with st.sidebar:
+                st.subheader("Show your feeds")
+                selected_feed_title = st.selectbox(
+                    "Select a feed", options=feed_options, index=None
+                )
+                if selected_feed_title:
+                    st.session_state["selected_feed"] = [
+                        feed["_key"]
+                        for feed in rss_feeds
+                        if feed["title"] == selected_feed_title
+                    ][0]
+                    st.rerun()
+        else:
+            st.write("You have no RSS feeds added.")
+
+    def search_feeds(self, rss_url):
+        with st.spinner("Discovering feeds..."):
+            feeds = self.reader.discover_feeds(rss_url)
+            if feeds:
+                st.session_state["discovered_feeds"] = feeds
+            else:
+                st.error("No RSS feeds found at the provided URL.")
+
+    def sidebar_actions(self):
+        if "discovered_feeds" not in st.session_state:
+            st.session_state["discovered_feeds"] = None
+
+        with st.sidebar:
+            self.select_rss_feeds()
+            st.subheader("Add a New RSS Feed")
+            with st.form("add_rss_feed"):
+                rss_url = st.text_input("Website URL or RSS Feed URL")
+                submitted = st.form_submit_button("Discover Feeds")
+                if submitted:
+                    print_green(rss_url)
+                    feeds = self.reader.discover_feeds(rss_url)
+                    st.session_state["discovered_feeds"] = feeds
+
+            if st.session_state["discovered_feeds"]:
+                st.subheader("Select a Feed to Add")
+                feeds = st.session_state["discovered_feeds"]
+                feed_options = [f"{feed['title']} ({feed['href']})" for feed in feeds]
+                selected_feed = st.selectbox("Available Feeds", options=feed_options)
+                selected_feed_url = feeds[feed_options.index(selected_feed)]["href"]
+
+                if st.button("Preview Feed"):
+                    feed = self.reader.parse_feed(selected_feed_url)
+                    st.write(f"{feed.title}")
+                    description = self.reader.html_to_markdown(feed.description)
+                    st.write(f"_{description}_")
+                    for entry in feed.entries[:5]:
+                        with st.expander(entry["title"]):
+                            summary = entry.get("summary", "No summary available")
+                            markdown_summary = self.reader.html_to_markdown(summary)
+                            st.markdown(markdown_summary)
+                    print_yellow(selected_feed_url)
+
+                if st.button(
+                    "Add RSS Feed",
+                    on_click=self.reader.add_rss_feed,
+                    args=[selected_feed_url],
+                ):
+                    del st.session_state["discovered_feeds"]
+                    st.success("RSS Feed added.")
+                    st.rerun()
+
+    def display_feed(self):
+        if st.session_state["selected_feed"]:
+            self.reader.get_feed(st.session_state["selected_feed"])
+            st.title(self.reader.feed.title)
+            st.write(f"_{self.reader.feed.description}_")
+            for entry in self.reader.feed.entries[:5]:
+                with st.expander(entry["title"]):
+                    summary = entry.get("summary", "No summary available")
+                    markdown_summary = self.reader.html_to_markdown(summary)
+                    st.markdown(markdown_summary)
+                    st.markdown(f"[Read more]({entry['link']})")
--- a/ollama_response_classes.py
+++ b/ollama_response_classes.py
@ -0,0 +1,6 @@
+from pydantic import BaseModel
+
+class QueryResponse(BaseModel):
+    query_to_vector_database: str
+    short_explanation: str
+    
--- a/projects_page.py
+++ b/projects_page.py
@ -20,15 +20,13 @@ from prompts import get_note_summary_prompt, get_image_system_prompt
 import env_manager

 env_manager.set_env()
-print_green("Environment variables set.")
-

 class ProjectsPage(StreamlitBaseClass):
    def __init__(self, username: str):
        super().__init__(username=username)
        self.projects = []
        self.selected_project_name = None
-        self.project = None
+        self.project = self.get_settings("current_project")
        self.page_name = "Projects"

        # Initialize attributes from session state if available
@ -52,9 +50,11 @@ class ProjectsPage(StreamlitBaseClass):
    def display_projects(self):
        with st.sidebar:
            self.new_project_button()
+            projects = [proj["name"] for proj in self.projects]
            self.selected_project_name = st.selectbox(
                "Select a project to manage",
                options=[proj["name"] for proj in self.projects],
+                index=projects.index(self.project) if self.project in projects else None,
            )
        if self.selected_project_name:
            self.project = Project(
@ -136,10 +136,12 @@ class ProjectsPage(StreamlitBaseClass):
            if interviews:
                for interview in interviews:
                    st.markdown(f'_{interview.get("timestamp", "")}_')
-                    st.markdown(
-                        f"**Interviewees:** {', '.join(interview['intervievees'])}"
-                    )
-                    st.markdown(f"**Interviewer:** {interview['interviewer']}")
+                    if interview['intervievees']:
+                        st.markdown(
+                            f"**Interviewees:** {', '.join(interview['intervievees'])}"
+                        )
+                    if interview['interviewer']:
+                        st.markdown(f"**Interviewer:** {interview['interviewer']}")
                    if len(interview["transcript"].split("\n")) > 6:
                        preview = (
                            "  \n".join(interview["transcript"].split("\n")[:6])
@ -261,7 +263,7 @@ class ProjectsPage(StreamlitBaseClass):
    def upload_interview_form(self):
        with st.expander("Upload interview"):
            with st.form("add_interview", clear_on_submit=True):
-                interview = st.file_uploader("Upload interview audio file")
+                interview = st.file_uploader("Upload interview audio file or transcript")
                interviewees = st.text_input(
                    "Enter the names of the interviewees, separated by commas"
                )
@ -398,6 +400,7 @@ class Project(StreamlitBaseClass):

    def load_project(self):
        print_blue("Project name:", self.name)
+
        project_cursor = self.user_arango.db.aql.execute(
            "FOR doc IN projects FILTER doc.name == @name RETURN doc",
            bind_vars={"name": self.name},
@ -415,6 +418,7 @@ class Project(StreamlitBaseClass):
        self.settings = project.get("settings", {})
        self.notes_summary = project.get("notes_summary", "")

+
    def update_project(self):
        updated_doc = {
            "_key": self._key,
@ -492,9 +496,27 @@ class Project(StreamlitBaseClass):
                document_type="interview",
                is_image=False,
            )
-        elif interview.type in ["plain/text"]:
-            # TODO Implement text file processing
-            pass
+        
+        elif interview.type in ["application/json", "text/plain"]:
+            import json
+            print_purple("JSON file processing")
+            interview_content = interview.getvalue().decode("utf-8")
+            print('Content:', interview_content)
+            interview_json = json.loads(interview_content)
+            formated_transcription = self.format_json_transcription(interview_json)
+            self.add_interview_transcript(
+                formated_transcription,
+                interview.name,
+                intervievees=None,
+                interviewer=None,
+                date_of_interveiw=None
+            )
+        else:
+            print(interview.type)
+            st.error("Unsupported file type")
+            st.stop()
+
+        st.rerun()

    def add_interview_transcript(
        self,
@ -504,7 +526,6 @@ class Project(StreamlitBaseClass):
        interviewer: str = None,
        date_of_interveiw: datetime.date = None,
    ):
-        print_yellow(transcript)
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
        _key = fix_key(f"{filename}_{timestamp}")
        if intervievees:
@ -515,7 +536,7 @@ class Project(StreamlitBaseClass):
            interviewer = self.username
        if not self.user_arango.db.has_collection("interviews"):
            self.user_arango.db.create_collection("interviews")
-        if date_of_interveiw:
+        if isinstance(date_of_interveiw, str):
            date_of_interveiw = datetime.strptime(date_of_interveiw, "%Y-%m-%d")

        from article2db import Document
@ -529,8 +550,7 @@ class Project(StreamlitBaseClass):
            username=self.username,
            arango_collection="interviews",
        )
-        print_rainbow(document.__dict__)
-        print(document.text)
+
        document.make_chunks(len_chunks=600)

        self.user_arango.db.collection("interviews").insert(
@ -619,6 +639,32 @@ class Project(StreamlitBaseClass):
                timestamp = None
        return "\n".join(transcript)

+    def format_json_transcription(self, transcription: dict):
+        transcript = []
+        print(transcript)
+        if isinstance(transcription, list):
+            # For the JSON format in MacWhisper
+            for line in transcription:
+                speaker = line.get("speaker", None)
+                if speaker:
+                    line = f"[{line['timestamp']}] {speaker}: {line['text']}"
+                else:
+                    line = f"[{line['timestamp']}] {line['text']}"
+                transcript.append(line)
+        elif isinstance(transcription, dict):
+            # For the DOT format in MacWhisper
+            if 'lines' in transcription:
+                for line in transcription['lines']:
+                    timestamp = line['startTime']
+                    text = line['text']
+                    speaker = line.get('speaker', None)
+                    if speaker:
+                        line = f"[{timestamp}] {speaker}: {text}"
+                    else:
+                        line = f"[{timestamp}] {text}"
+                    transcript.append(line)
+        return "\n".join(transcript)
+
    def delete_note(self, note_id):
        if note_id in self.notes:
            self.notes.remove(note_id)
@ -727,7 +773,7 @@ class Project(StreamlitBaseClass):
            return None

        try:
-            page = wikipedia.page(page_name)
+            page = wikipedia.page(page_name, auto_suggest=False)
            data = {
                "title": page.title,
                "summary": page.summary,
@ -747,7 +793,7 @@ class Project(StreamlitBaseClass):
        )
        if wiki_data.get("summary"):
            query = f'''Summarize the text below. It's from a Wikipedia page about {wiki_data["title"]}. \n\n"""{wiki_data['summary']}"""\nMake a detailed and concise summary of the text.'''
-            summary = llm.generate(query)
+            summary = llm.generate(query).content
        wiki_data["text"] = (
            f"(_Summarised using AI, read original [here]({wiki_url})_)\n{summary}"
        )
@ -759,12 +805,15 @@ class Project(StreamlitBaseClass):
        self.add_note(wiki_data)

        processor = PDFProcessor(process=False)
-        dois = [
-            processor.extract_doi(ref)
-            for ref in wiki_data.get("references", [])
-            if processor.extract_doi(ref)
-        ]
-        if dois:
+        dois = []
+        print_rainbow(wiki_data.get("references", []))
+        for ref in wiki_data.get("references", []):
+            doi = processor.extract_doi(ref)
+            if doi:
+                print_blue("Found DOI:", doi)
+                dois.append(doi)
+
+        if len(dois) > 0:
            current_collection = st.session_state["settings"].get("current_collection")
            st.markdown(
                f"Found {len(dois)} references with DOI numbers. Do you want to add them to {current_collection}?"
--- a/prompts.py
+++ b/prompts.py
@ -139,7 +139,8 @@ def get_query_builder_system_message():
        Take the user input and write it as a sentence that could be used as a query for a vector database. 
        The vector database will return text snippets that semantically match the query, so you CAN'T USE NEGATIONS or other complex language constructs. If there is a negation in the user input, exclude that part from the query. 
        If the user input seems to be a follow-up question or comment, use the context from the chat history to make a relevant query. 
-        Answer ONLY with the query, no explanation or reasoning!
+        Remember that the query is meant to return information on a specific topic, so make sure the query is focused on that topic. Don't make a query to search for tools or methods (if it's not information about a specific method, e.g. a scientific method), only for actual information.
+        Answer ONLY with the query, NO explanation or reasoning!
        """
    return re.sub(r"\s*\n\s*", "\n", system_message)
    
@ -171,8 +172,8 @@ def get_image_system_prompt(project):

 def get_tools_prompt(user_input):
    prompt = f'''User message: "{user_input}" 
-    You have to choose one or many tools in order to answer the message. It's important that you think of what information (if any) is needed to make a good answer. 
-    Make sure to read the description of the tools carefully before choosing!
+    You have to choose one or many tools in order to answer the message. It's important that you think of what information is needed to make a good answer. 
+    Make sure to read the description of the tools carefully before choosing! E.g. chose the conversational response tool ONLY if the user is small talking or asking, use other tools if the user is asking a question or want information.
    You can ONLY chose a tool you are provided with, don't make up a tool!
    You HAVE TO CHOOSE A TOOL, even if you think you can answer without it. Don't answer the question without choosing a tool.
    '''
@ -203,10 +204,11 @@ def get_summary_prompt(text, is_sci):

 def get_generate_vector_query_prompt(user_input: str, role: str):
    print(role.upper())
-    if role in ["Research Assistant", "Editor"]:
-        query = f"""A user asked this question: "{user_input}". Generate a query for the vector database. Make sure to follow the instructions you got earlier!"""
-    elif role == "Guest":
-        query = f"""A podcast host has asked this question in an interview: "{user_input}". Generate a query for the vector database to answer the actial question. Make sure to follow the instructions you got earlier!"""
+    if role == "Guest":
+        query = f"""A podcast host has asked this question in an interview: "{user_input}". Generate a query for the vector database to answer the actial question."""
    elif role == "Host":
-        query = f"""An expert has stated: "{user_input}". Generate a query for the vector database to get context for that answer in order to come up with a new question. Make sure to follow the instructions you got earlier!"""
+        query = f"""An expert has stated: "{user_input}". Generate a query for the vector database to get context for that answer in order to come up with a new question."""
+    else:
+        query = f"""A user asked this question: "{user_input}". Generate a query for the vector database"""
+    query += "\nMake sure to follow the instructions you got earlier!"
    return query
--- a/research_page.py
+++ b/research_page.py
@ -0,0 +1,406 @@
+import streamlit as st
+from datetime import datetime
+from colorprinter.print_color import *
+
+from _base_class import StreamlitBaseClass
+from projects_page import Project
+from agent_research import ResearchReport, MasterAgent, StructureAgent, ToolAgent, ArchiveAgent, process_step
+import os
+import json
+
+
+class ResearchPage(StreamlitBaseClass):
+    """
+    ResearchPage - A Streamlit interface for deep research using AI agents.
+    
+    This class provides a user interface for conducting in-depth research using
+    multiple specialized AI agents working together. It allows users to input
+    research questions, track progress, and view detailed research reports.
+    
+    Attributes:
+        username (str): The username of the current user.
+        project_name (str): Name of the selected project.
+        project (Project): Project instance the research is associated with.
+        page_name (str): Name of the current page ("Research").
+        research_state (dict): Dictionary tracking the current state of research.
+        report (ResearchReport): Instance for tracking research progress and results.
+    
+    Methods:
+        run(): Main method to render the research interface and handle interactions.
+        sidebar_actions(): Renders sidebar elements for selecting projects and research options.
+        start_new_research(): Initiates a new research session.
+        view_saved_reports(): Displays a list of saved research reports.
+        display_report(): Renders a research report in the Streamlit interface.
+        show_research_progress(): Displays the current research progress.
+    """
+    def __init__(self, username):
+        super().__init__(username=username)
+        self.project_name = None
+        self.project = None
+        self.page_name = "Research"
+        
+        # Research state tracking
+        self.research_state = {
+            "in_progress": False,
+            "completed": False,
+            "question": None,
+            "started_at": None,
+            "report": None,
+            "current_step": None,
+            "steps_completed": 0,
+            "total_steps": 0
+        }
+        
+        self.report = None
+        
+        # Initialize attributes from session state if available
+        if self.page_name in st.session_state:
+            for k, v in st.session_state[self.page_name].items():
+                setattr(self, k, v)
+                
+        # Create reports directory if it doesn't exist
+        os.makedirs(f"/home/lasse/sci/reports", exist_ok=True)
+
+    def run(self):
+        self.update_current_page("Research")
+        self.sidebar_actions()
+        
+        st.title("Deep Research")
+        
+        if not self.project:
+            st.warning("Please select a project to start researching.")
+            return
+            
+        # Main interface
+        if self.research_state["in_progress"]:
+            self.show_research_progress()
+        elif self.research_state["completed"]:
+            self.display_report(self.research_state["report"])
+        else:
+            # Input for new research
+            st.subheader(f"New Research for Project: {self.project_name}")
+            with st.form("research_form"):
+                question = st.text_area("Enter your research question:", 
+                                       help="Be specific about what you want to research. Complex questions will be broken down into sub-questions.")
+                start_button = st.form_submit_button("Start Research")
+                
+                if start_button and question:
+                    self.start_new_research(question)
+                    st.rerun()
+            
+            # Option to view saved reports
+            with st.expander("View Saved Reports"):
+                self.view_saved_reports()
+    
+    def sidebar_actions(self):
+        with st.sidebar:
+            with st.form("select_project"):
+                self.project = self.choose_project("Project for research:")
+                submitted = st.form_submit_button("Select Project")
+                
+                if submitted and self.project:
+                    self.project_name = self.project.name
+                    st.success(f"Selected project: {self.project_name}")
+
+            if self.research_state["in_progress"]:
+                st.info(f"Research in progress: {self.research_state['question']}")
+                if st.button("Cancel Research"):
+                    self.research_state["in_progress"] = False
+                    st.rerun()
+                    
+            elif self.research_state["completed"]:
+                if st.button("Start New Research"):
+                    self.research_state["completed"] = False
+                    self.research_state["report"] = None
+                    st.rerun()
+
+    def start_new_research(self, question):
+        """Initiates a new research session with the given question"""
+        self.research_state["question"] = question
+        self.research_state["in_progress"] = True
+        self.research_state["completed"] = False
+        self.research_state["started_at"] = datetime.now().isoformat()
+        
+        # Initialize the research report
+        self.report = ResearchReport(
+            question=question, 
+            username=self.username, 
+            project_name=self.project_name
+        )
+        
+        # Save current state
+        st.session_state[self.page_name] = {
+            "project_name": self.project_name,
+            "project": self.project,
+            "research_state": self.research_state,
+            "report": self.report
+        }
+        
+        # Start a new thread to run the research process
+        # In a production environment, you might want to use a background job
+        # For now, we'll run it in the main thread with streamlit spinner
+        with st.spinner("Research in progress... This may take several minutes."):
+            try:
+                # Initialize agents
+                master_agent = MasterAgent(
+                    username=self.username, 
+                    project=self.project, 
+                    report=self.report, 
+                    chat=True
+                )
+                structure_agent = StructureAgent(
+                    username=self.username, 
+                    model="small", 
+                    report=self.report
+                )
+                tool_agent = ToolAgent(
+                    username=self.username,
+                    model="tools",
+                    system_message="You are an assistant with tools. Always choose a tool to help with the task.",
+                    report=self.report,
+                    project=self.project,
+                    chat=True
+                )
+                archive_agent = ArchiveAgent(
+                    username=self.username,
+                    report=self.report,
+                    project=self.project,
+                    system_message="You are an assistant specialized in reading and summarizing research information.",
+                    chat=True
+                )
+                
+                # Track the research state in the master agent
+                master_agent.research_state["original_question"] = question
+                
+                # Execute the research workflow
+                # 1. Create research plan
+                st.text("Creating research plan...")
+                research_plan = master_agent.make_plan(question)
+                self.report.log_plan(research_plan)
+                
+                # 2. Structure the plan
+                st.text("Structuring research plan...")
+                structured_plan = structure_agent.make_structured(research_plan, question)
+                self.report.log_plan(research_plan, structured_plan.model_dump())
+                
+                # Update total steps count
+                self.research_state["total_steps"] = len(structured_plan.steps)
+                
+                # 3. Execute the plan step by step
+                execution_results = {}
+                
+                for step_name, tasks in structured_plan.steps.items():
+                    st.text(f"Processing step: {step_name}")
+                    self.research_state["current_step"] = step_name
+                    self.research_state["steps_completed"] += 1
+                    
+                    # Collect all task descriptions in this step
+                    step_tasks = [
+                        {"task_name": task_name, "task_description": task_description}
+                        for task_name, task_description in tasks
+                    ]
+                    
+                    # Process the entire step
+                    step_result = process_step(
+                        step_name, step_tasks, master_agent, tool_agent, archive_agent
+                    )
+                    execution_results[step_name] = step_result
+                
+                # 4. Evaluate if more steps are needed
+                st.text("Evaluating research plan...")
+                plan_evaluation = master_agent.evaluate_plan(execution_results)
+                self.report.log_plan_evaluation(plan_evaluation)
+                
+                # 5. Write the final report
+                st.text("Writing final report...")
+                final_report = master_agent.write_report(execution_results)
+                self.report.log_final_report(final_report)
+                
+                # 6. Save the reports
+                timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+                report_path = f"/home/lasse/sci/reports/research_report_{self.username}_{timestamp}"
+                
+                # Save JSON report
+                json_path = f"{report_path}.json"
+                with open(json_path, "w") as f:
+                    json.dump(self.report.get_full_report(), f, indent=2)
+                
+                # Save markdown report
+                markdown_report = self.report.get_markdown_report()
+                markdown_path = f"{report_path}.md"
+                with open(markdown_path, "w") as f:
+                    f.write(markdown_report)
+                
+                # Update research state
+                self.research_state["in_progress"] = False
+                self.research_state["completed"] = True
+                self.research_state["report"] = {
+                    "json_path": json_path,
+                    "markdown_path": markdown_path,
+                    "report_data": self.report.get_full_report(),
+                    "markdown_content": markdown_report
+                }
+                
+            except Exception as e:
+                st.error(f"An error occurred during research: {str(e)}")
+                import traceback
+                st.code(traceback.format_exc())
+                self.research_state["in_progress"] = False
+        
+        # Update session state
+        st.session_state[self.page_name] = {
+            "project_name": self.project_name,
+            "project": self.project,
+            "research_state": self.research_state,
+            "report": self.report
+        }
+
+    def view_saved_reports(self):
+        """Displays a list of saved research reports"""
+        reports_dir = "/home/lasse/sci/reports"
+        if not os.path.exists(reports_dir):
+            st.info("No saved reports found.")
+            return
+            
+        # Get all report files
+        json_files = [f for f in os.listdir(reports_dir) if f.endswith('.json') and f.startswith('research_report')]
+        
+        if not json_files:
+            st.info("No saved reports found.")
+            return
+            
+        for file in sorted(json_files, reverse=True):
+            file_path = os.path.join(reports_dir, file)
+            try:
+                with open(file_path, 'r') as f:
+                    report_data = json.load(f)
+                    
+                # Extract basic info
+                question = report_data.get("metadata", {}).get("question", "Unknown question")
+                project = report_data.get("metadata", {}).get("project_name", "No project")
+                started_at = report_data.get("metadata", {}).get("started_at", "Unknown time")
+                
+                # Format the date
+                try:
+                    date_obj = datetime.fromisoformat(started_at)
+                    date_str = date_obj.strftime("%Y-%m-%d %H:%M")
+                except:
+                    date_str = started_at
+                
+                # Create an expandable section for each report
+                st.markdown(f"_{question} ({project} - {date_str})_")
+                st.markdown(f"**Project:** {project}")
+                st.markdown(f"**Date:** {date_str}")
+                
+                # Button to view full report
+                if st.button("View Full Report", key=f"view_{file}"):
+                    # Load corresponding markdown file if it exists
+                    md_file = file.replace('.json', '.md')
+                    md_path = os.path.join(reports_dir, md_file)
+                    
+                    if os.path.exists(md_path):
+                        with open(md_path, 'r') as f:
+                            markdown_content = f.read()
+                    else:
+                        markdown_content = None
+                        
+                    self.research_state["completed"] = True
+                    self.research_state["report"] = {
+                        "json_path": file_path,
+                        "markdown_path": md_path if os.path.exists(md_path) else None,
+                        "report_data": report_data,
+                        "markdown_content": markdown_content
+                    }
+                    st.rerun()
+                        
+            except Exception as e:
+                st.error(f"Error loading report {file}: {str(e)}")
+
+    def display_report(self, report_data):
+        """Renders a research report in the Streamlit interface"""
+        if not report_data:
+            st.warning("No report data available.")
+            return
+            
+        st.title("Research Report")
+        
+        # Get report data
+        markdown_content = report_data.get("markdown_content")
+        json_data = report_data.get("report_data")
+        
+        if markdown_content:
+            # Display the markdown report
+            st.markdown(markdown_content)
+        elif json_data:
+            # Fallback to displaying JSON data in a more readable format
+            question = json_data.get("metadata", {}).get("question", "Unknown question")
+            st.header(f"Research on: {question}")
+            
+            # Display metadata
+            st.subheader("Metadata")
+            metadata = json_data.get("metadata", {})
+            st.markdown(f"**Project:** {metadata.get('project_name', 'None')}")
+            st.markdown(f"**Started:** {metadata.get('started_at', 'Unknown')}")
+            st.markdown(f"**Finished:** {metadata.get('finished_at', 'Unknown')}")
+            
+            # Display final report
+            st.subheader("Research Findings")
+            st.markdown(json_data.get("final_report", "No final report available."))
+            
+            # Display steps
+            st.subheader("Research Steps")
+            steps = json_data.get("steps", {})
+            for step_name, step_data in steps.items():
+                with st.expander(step_name):
+                    st.markdown(f"**Summary:** {step_data.get('summary', 'No summary available.')}")
+                    
+                    # Display tools used
+                    st.markdown("**Tools used:**")
+                    for tool in step_data.get("tools_used", []):
+                        st.markdown(f"- {tool.get('tool', 'Unknown tool')} with query: _{tool.get('args', {}).get('query', 'No query')}_")
+            
+        else:
+            st.error("No report content available to display.")
+        
+        # Download buttons
+        col1, col2 = st.columns(2)
+        with col1:
+            if report_data.get("markdown_path") and os.path.exists(report_data["markdown_path"]):
+                with open(report_data["markdown_path"], "r") as f:
+                    markdown_content = f.read()
+                    st.download_button(
+                        label="Download as Markdown",
+                        data=markdown_content,
+                        file_name=os.path.basename(report_data["markdown_path"]),
+                        mime="text/markdown"
+                    )
+                    
+        with col2:
+            if report_data.get("json_path") and os.path.exists(report_data["json_path"]):
+                with open(report_data["json_path"], "r") as f:
+                    json_content = f.read()
+                    st.download_button(
+                        label="Download as JSON",
+                        data=json_content,
+                        file_name=os.path.basename(report_data["json_path"]),
+                        mime="application/json"
+                    )
+
+    def show_research_progress(self):
+        """Displays the current research progress"""
+        st.subheader("Research in Progress")
+        st.markdown(f"**Question:** {self.research_state['question']}")
+        
+        # Show progress bar
+        progress = 0
+        if self.research_state["total_steps"] > 0:
+            progress = self.research_state["steps_completed"] / self.research_state["total_steps"]
+        
+        st.progress(progress)
+        
+        # Show current step
+        current_step = self.research_state.get("current_step", "Planning")
+        st.markdown(f"**Current step:** {current_step}")
+        
+        st.info("Research is ongoing. This may take several minutes depending on the complexity of the question.")
+        st.warning("Please do not navigate away from this page while research is in progress.")
--- a/settings_page.py
+++ b/settings_page.py
@ -0,0 +1,55 @@
+
+import streamlit as st
+from time import sleep
+from colorprinter.print_color import *
+
+
+from _base_class import StreamlitBaseClass
+
+
+
+class SettingsPage(StreamlitBaseClass):
+    def __init__(self, username: str):
+        super().__init__(username=username)
+
+    def run(self):
+        self.update_current_page("Settings")
+        self.set_profile_picture()
+        self.use_reasoning_model()
+
+    def set_profile_picture(self):
+        st.markdown("Profile picture")
+        profile_picture = st.file_uploader(
+            "Upload profile picture", type=["png", "jpg", "jpeg"]
+        )
+        if profile_picture:
+            # Resize the image to 64x64 pixels
+            from PIL import Image
+
+            img = Image.open(profile_picture)
+            img.thumbnail((64, 64))
+            img_path = f"user_data/{st.session_state['username']}/profile_picture.png"
+            img.save(img_path)
+            self.update_settings("avatar", img_path)
+            st.success("Profile picture uploaded")
+            sleep(1)
+
+    def use_reasoning_model(self):
+        """
+        Displays a checkbox in the Streamlit interface to enable or disable the reasoning model for generating responses in chats.
+        
+        Retrieves the current settings and checks if the "use_reasoning_model" key exists. If not, it initializes it to False.
+        Then, it displays a markdown text and a checkbox for the user to toggle the reasoning model usage.
+        The updated setting is saved back to the settings.
+
+        Returns:
+            None
+        """
+        settings = self.get_settings()
+        if "use_reasoning_model" not in settings:
+            settings["use_reasoning_model"] = False
+        st.markdown("Use Reasoning Model")
+
+        use_reasoning_model = st.checkbox("Use Reasoning Model", value=settings["use_reasoning_model"], help="Use the reasoning model to generate responses in chats. This may take longer to process.")
+        self.update_settings("use_reasoning_model", use_reasoning_model)
+
--- a/streamlit_app.py
+++ b/streamlit_app.py
@ -59,7 +59,8 @@ if st.session_state["authentication_status"]:
                Bot_Chat,
                Projects,
                Settings,
-                RSS_Feeds
+                RSS_Feeds,
+                Research
            )

            break
@ -85,8 +86,11 @@ if st.session_state["authentication_status"]:
    article_collections = st.Page(Article_Collections)
    settings = st.Page(Settings)
    rss_feeds = st.Page(RSS_Feeds)
+    research = st.Page(Research)
+
+
    sleep(0.1)
-    pg = st.navigation([bot_chat, projects, article_collections, rss_feeds, settings])
+    pg = st.navigation([bot_chat, projects, article_collections, research, rss_feeds, settings])
    sleep(0.1)
    pg.run()
    # try: #TODO Use this when in production
--- a/streamlit_chatbot.py
+++ b/streamlit_chatbot.py
@ -5,15 +5,70 @@ from _llm import LLM
 from prompts import *
 from colorprinter.print_color import *
 from ollama._types import Message as OllamaMessage
-from ollama._types import ChatResponse as OllamaChatResponse
 from projects_page import Project
+from ollama_response_classes import QueryResponse


 class Chat(StreamlitBaseClass):
-    def __init__(self, username=None, role=None, key=None, **kwargs):
+    """
+    A class to represent a chat session in a Streamlit application.
+
+    Attributes:
+    -----------
+    name : str
+        The name of the chat.
+    chat_history : list
+        A list to store the chat history.
+    role : str
+        The role of the user in the chat.
+    project : str
+        The project associated with the chat.
+    collection : str
+        The collection associated with the chat.
+    _key : str
+        The unique key for the chat.
+
+    Methods:
+    --------
+    add_message(role, content):
+        Adds a message to the chat history.
+    
+    to_dict():
+        Converts the chat object to a dictionary.
+    
+    update_in_arango():
+        Updates the chat object in the ArangoDB.
+    
+    set_name(user_input):
+        Sets the name of the chat based on user input.
+    
+    show_title(title=None):
+        Displays the title of the chat in the Streamlit application.
+    
+    from_dict(data):
+        Creates a Chat object from a dictionary.
+    
+    chat_history2bot(n_messages=None, remove_system=False):
+        Converts the chat history to a format suitable for a bot.
+    """
+    def __init__(
+        self,
+        username=None,
+        role=None,
+        key=None,
+        project=None,
+        collection=None,
+        **kwargs,
+    ):
        super().__init__(username=username, **kwargs)
        self.name = kwargs.get("name", None)
        self.chat_history = kwargs.get("chat_history", [])
+        self.role = role
+        self.project = kwargs.get("project") if "project" in kwargs else project
+        self.collection = (
+            kwargs.get("collection") if "collection" in kwargs else collection
+        )
+        self._key = key

    def add_message(self, role, content):
        if isinstance(content, str):
@ -71,6 +126,21 @@ class Chat(StreamlitBaseClass):
        self.name = name
        return name

+    def show_title(self, title=None):
+        title = (
+            title
+            if title
+            else (
+                self.project
+                if self.project
+                else self.collection if self.collection else "No title"
+            )
+        )
+        st.markdown(
+            f"""### Chat about *{title.strip()}* with *{self.role}*""",
+        )
+
+
    @classmethod
    def from_dict(cls, data):
        return cls(
@ -96,6 +166,27 @@ class Chat(StreamlitBaseClass):


 class StreamlitChat(Chat):
+    '''
+    A class to manage chat interactions within a Streamlit application.
+
+    Inherits from the Chat class and provides additional functionality to handle
+    chat history, user roles, and avatars within a Streamlit app context.
+
+    Attributes:
+        project (str): The project associated with the chat.
+        collection (str): The collection associated with the chat.
+        message_attachments (None): Placeholder for message attachments.
+        last_updated (str): Timestamp of the last update in ISO format.
+        _key (str): Unique identifier for the chat.
+        role (str): The role of the user in the chat.
+        username (str): The username of the user in the chat.
+        name (str): The name of the chat.
+        chat_history (list): List of messages in the chat history.
+
+    Methods:
+        show_chat_history():
+        get_avatar(message: dict = None, role=None) -> str:
+        '''
    def __init__(self, username: str, role: str, _key: str = None, **kwargs):
        super().__init__(username, role, _key, **kwargs)
        self.project = kwargs.get("project", None)
@ -123,6 +214,15 @@ class StreamlitChat(Chat):
            )["_key"]

    def show_chat_history(self):
+        """
+        Displays the chat history in the Streamlit app.
+
+        Iterates through the chat history and displays messages from the user and assistant.
+        Messages from other roles are ignored. Each message is displayed with an avatar.
+
+        Returns:
+            None
+        """
        for message in self.chat_history:
            if message["role"] not in ["user", "assistant"]:
                continue
@ -132,6 +232,19 @@ class StreamlitChat(Chat):
                    st.markdown(message["content"].strip('"'))

    def get_avatar(self, message: dict = None, role=None) -> str:
+        """
+        Retrieves the avatar image path based on the message or role provided.
+
+        Args:
+            message (dict, optional): A dictionary containing message details, including the role.
+            role (str, optional): The role of the user if the message is not provided.
+
+        Returns:
+            str: The file path to the avatar image.
+
+        Raises:
+            AssertionError: If neither message nor role is provided.
+        """
        assert message or role, "Either message or role must be provided"
        if message and message.get("role", None) == "user" or role == "user":
            avatar = st.session_state["settings"].get("avatar", "user")
@ -153,7 +266,38 @@ class StreamlitChat(Chat):
            avatar = None
        return avatar

+
 class Bot(BaseClass):
+    '''
+    A chatbot class that integrates with research tools and document retrieval systems.
+    The Bot class provides an interface for conversational AI that can access and process
+    various document sources, including scientific articles, user notes, and other documents.
+    It initializes multiple specialized language models for different tasks, including
+    regular conversation, query generation, and tool selection.
+    Attributes:
+        username (str): The username associated with this bot instance.
+        chat (Chat): Chat instance for managing conversation history.
+        project (Project, optional): Associated project for document context.
+        collection (list, optional): Collections of documents to search within.
+        arango_ids (list): List of document IDs in ArangoDB.
+        chatbot (LLM): Main language bot for conversation.
+        helperbot (LLM): Bot for generating queries.
+        toolbot (LLM): Bot for selecting appropriate tools.
+        tools (list): List of tool functions available to the bot.
+    Methods:
+        initiate_bots(): Initialize the different language model instances.
+        get_chunks(): Retrieve relevant text chunks based on user input.
+        answer_tool_call(): Process and execute tool calls from the AI.
+        generate_from_notes(): Generate a response from user notes.
+        generate_from_chunks(): Generate a response from document chunks.
+        run(): Run the bot (implemented by subclasses).
+        get_notes(): Retrieve notes from the database.
+        fetch_science_articles_tool(): Retrieve scientific articles.
+        fetch_other_documents_tool(): Retrieve non-scientific documents.
+        fetch_science_articles_and_other_documents_tool(): Retrieve both document types.
+        fetch_notes_tool(): Retrieve user notes.
+        conversational_response_tool(): Generate a simple conversational response.
+    '''
    def __init__(self, username: str, chat: Chat = None, tools: list = None, **kwargs):
        super().__init__(username=username, **kwargs)
        # Use the passed in chat or create a new Chat
@ -175,7 +319,6 @@ class Bot(BaseClass):
        self.helperbot = None
        self.toolbot = None

-        self.initiate_bots()
        if self.collection:
            for c in self.collection:
                for _id in self.user_arango.db.aql.execute(
@ -189,21 +332,24 @@ class Bot(BaseClass):
                ):
                    self.arango_ids.append(_id)

-        # Map tool names to functions
-        tool_mapping = {
-            "fetch_other_documents_tool": self.fetch_other_documents_tool,
-            "fetch_science_articles_tool": self.fetch_science_articles_tool,
-            "fetch_science_articles_and_other_documents_tool": self.fetch_science_articles_and_other_documents_tool,
-            "fetch_notes_tool": self.fetch_notes_tool,
-            "conversational_response_tool": self.conversational_response_tool,
-        }

        # Convert tool names to function references
        if tools:
-            self.tools = [tool_mapping[tool] if isinstance(tool, str) else tool for tool in tools]
+            # Map tool names to functions
+            tool_mapping = {
+                "fetch_other_documents_tool": self.fetch_other_documents_tool,
+                "fetch_science_articles_tool": self.fetch_science_articles_tool,
+                "fetch_science_articles_and_other_documents_tool": self.fetch_science_articles_and_other_documents_tool,
+                "fetch_notes_tool": self.fetch_notes_tool,
+                "conversational_response_tool": self.conversational_response_tool,
+            }
+            self.tools = [
+                tool_mapping[tool] if isinstance(tool, str) else tool for tool in tools
+            ]
        else:
            self.tools = None

+        self.initiate_bots()
        # Store other kwargs
        for arg in kwargs:
            setattr(self, arg, kwargs[arg])
@ -215,6 +361,23 @@ class Bot(BaseClass):
        #     print_red(f"Error initiating bots: {e}")

    def initiate_bots(self):
+        """
+        Initialize the different bot instances used in the chatbot application.
+        
+        Creates three types of bots:
+        1. chatbot: A standard LLM for normal conversation with the user
+        2. helperbot: A specialized LLM with low temperature for generating concise queries or prompts
+        3. toolbot: A specialized LLM for selecting which tool to use when responding to user queries
+           (only created if tools are provided)
+        
+        The toolbot is configured to prefer specialized tools over conversational responses
+        when the user is seeking information rather than engaging in small talk.
+        
+        Note:
+            - The chatbot uses the full chat history
+            - The helperbot uses a limited chat history (last 4 messages) with system message removed
+            - The toolbot uses a system message that lists all available tools
+        """
        # A standard LLM for normal chat
        self.chatbot = LLM(messages=self.chat.chat_history2bot())
        # A helper bot for generating queries or short prompts
@ -226,18 +389,24 @@ class Bot(BaseClass):
            messages=self.chat.chat_history2bot(n_messages=4, remove_system=True),
        )
        # A specialized LLM picking which tool to use
-        self.toolbot = LLM(
-            temperature=0,
-            system_message="""
-            You are an helpful assistant with some tools.
-            Your task is to choose one or multiple tools to answering a user's query.
-            ALWAYS choose one or more of the provided tools.
-            DON'T come up with your own tools, only use the ones provided.
-            """,
-            # system_message='Use one of the provided tools to help the answering bot to answer the user. Do not answer directly. Use the "tool_calls" field in your answer.',
-            chat=False,
-            model="tools"
-        )
+        if self.tools:
+            tools_names = [tool.__name__ for tool in self.tools]
+            tools_name_string = "\n– ".join(tools_names)
+            self.toolbot = LLM(
+                temperature=0,
+                system_message=f"""
+                You are an helpful assistant with tools. The tools you can choose from are:
+                {tools_name_string}
+                Your task is to choose one or multiple tools to answering a user's query.
+                DON'T come up with your own tools, only use the ones provided.
+                """,
+                # system_message='Use one of the provided tools to help the answering bot to answer the user. Do not answer directly. Use the "tool_calls" field in your answer.',
+                chat=False,
+                model="tools",
+            )
+            if len(tools_names) > 1 and "conversational_response_tool" in tools_names:
+                self.toolbot.system_message += "\n\nMake sure to only use the conversational response tool if the user is engaging in small talk. If the user is asking a question or looking for information, make sure to use one of the other tools!"
+

    def get_chunks(
        self,
@ -247,31 +416,78 @@ class Bot(BaseClass):
        n_sources=4,
        filter=True,
    ):
-        # Basic version without Streamlit calls
-        query = self.helperbot.generate(
-            get_generate_vector_query_prompt(user_input, self.chat.role)
-        ).content.strip('"')
+        """
+        Retrieves relevant text chunks from the vector database based on user input.
+        
+        This method:
+        1. Generates a vector query based on user input using the helper bot
+        2. Searches multiple collections in the vector database
+        3. Combines results and sorts them by relevance
+        4. Limits results to the specified number of unique sources
+        5. Cleans the text by removing footnote references
+        6. Enriches the chunks with detailed metadata from ArangoDB
+        7. Groups chunks by article title
+        
+        Parameters:
+        -----------
+        user_input : str
+            The user query to search for relevant documents
+        collections : list, optional
+            List of collection names to search in (default: ["sci_articles", "other_documents"])
+        n_results : int, optional
+            Maximum number of results to return (default: 7)
+        n_sources : int, optional
+            Maximum number of unique document sources to include (default: 4)
+        filter : bool, optional
+            Whether to filter results by ArangoDB IDs (default: True)
+            
+        Returns:
+        --------
+        dict
+            A dictionary of grouped chunks where:
+            - Keys are article titles
+            - Values are dictionaries containing:
+                - 'article_number': A sequential number for the article
+                - 'chunks': A list of chunk dictionaries, each containing:
+                    - 'document': The document text
+                    - 'metadata': The document metadata
+                    - 'distance': The similarity distance (lower is better)
+                    - 'article_number': The sequential number of the article
+        """
+
+        response = self.helperbot.generate(
+            get_generate_vector_query_prompt(user_input, self.chat.role),
+            format=QueryResponse.model_json_schema(),
+        )
+        print(response)
+        print_yellow("RESPONSE:", response.content)
+        query_response = QueryResponse.model_validate_json(response.content)
+        query = query_response.query_to_vector_database
+        print_purple(f"Query for vector DB:\n {query}")

        combined_chunks = []
        if collections:
            for collection in collections:
-                where_filter = {"_id": {"$in": self.arango_ids}} if filter else {}
-                chunks = self.get_chromadb().query(
-                    query=query,
-                    collection=collection,
-                    n_results=n_results,
-                    n_sources=n_sources,
-                    where=where_filter,
-                    max_retries=3,
-                )
-                for doc, meta, dist in zip(
-                    chunks["documents"][0],
-                    chunks["metadatas"][0],
-                    chunks["distances"][0],
-                ):
-                    combined_chunks.append(
-                        {"document": doc, "metadata": meta, "distance": dist}
+
+                if filter:
+                    where_filter = {"_id": {"$in": self.arango_ids}}
+                    chunks = self.get_chromadb().query(
+                        query=query,
+                        collection=collection,
+                        n_results=n_results,
+                        n_sources=n_sources,
+                        where=where_filter,
+                        max_retries=3,
                    )
+                    for doc, meta, dist in zip(
+                        chunks["documents"][0],
+                        chunks["metadatas"][0],
+                        chunks["distances"][0],
+                    ):
+                        combined_chunks.append(
+                            {"document": doc, "metadata": meta, "distance": dist}
+                        )
+                
        combined_chunks.sort(key=lambda x: x["distance"])

        # Keep the best chunks according to n_sources
@ -288,7 +504,11 @@ class Bot(BaseClass):
            remaining_chunks = [c for c in combined_chunks if c not in closest_chunks]
            closest_chunks.extend(remaining_chunks[: n_results - len(closest_chunks)])

-        # Now fetch real metadata from Arango
+        # Remove footnoot references like [\d+] from the text chunks
+        for chunk in closest_chunks:
+            chunk["document"] = re.sub(r"\[\d+\]", "", chunk["document"])
+
+        # Fetch real metadata from Arango
        for chunk in closest_chunks:
            _id = chunk["metadata"].get("_id")
            if not _id:
@ -317,9 +537,38 @@ class Bot(BaseClass):
                }
                article_number += 1
            grouped_chunks[title]["chunks"].append(chunk)
+
        return grouped_chunks

    def answer_tool_call(self, response, user_input):
+        """
+        Process tool calls returned by the AI and execute the corresponding functions.
+        
+        This method evaluates tool calls in the AI response, executes the appropriate
+        functions with the provided arguments, and collects the resulting responses.
+        
+        Parameters:
+        -----------
+        response : dict
+            The AI response containing potential tool_calls to be executed
+        user_input : str
+            The original user query that will be passed to tool functions
+            
+        Returns:
+        --------
+        list
+            A list of string responses generated from executing the tool calls.
+            Returns an empty string if no tool calls are present.
+            
+        Notes:
+        ------
+        Supported tool functions include:
+        - fetch_other_documents_tool: Retrieves non-scientific documents
+        - fetch_science_articles_tool: Retrieves scientific articles
+        - fetch_science_articles_and_other_documents_tool: Retrieves both types of documents
+        - fetch_notes_tool: Retrieves user notes
+        - conversational_response_tool: Generates a conversational response
+        """
        bot_responses = []
        # This method returns / stores responses (no Streamlit calls)
        if not response.get("tool_calls"):
@ -331,28 +580,22 @@ class Bot(BaseClass):
            arguments["query"] = user_input

            if hasattr(self, function_name):
+                print_purple("Function name:", function_name)
                if function_name in [
                    "fetch_other_documents_tool",
                    "fetch_science_articles_tool",
                    "fetch_science_articles_and_other_documents_tool",
                ]:
                    chunks = getattr(self, function_name)(**arguments)
-                    bot_responses.append(
-                        self.generate_from_chunks(user_input, chunks)
-                    )
+                    bot_responses.append(self.generate_from_chunks(user_input, chunks))
                elif function_name == "fetch_notes_tool":
                    notes = getattr(self, function_name)()
-                    bot_responses.append(
-                        self.generate_from_notes(user_input, notes)
-                    )
+                    bot_responses.append(self.generate_from_notes(user_input, notes))
                elif function_name == "conversational_response_tool":
                    response: OllamaMessage = getattr(self, function_name)(user_input)
-                    print_green('Conversation response:', response)
-                    bot_responses.append(
-                        response.content.strip('"')
-                    )
-        print_rainbow(i for i in bot_responses)
-        return "\n\n".join(i for i in bot_responses)
+                    print_green("Conversation response:", response)
+                    bot_responses.append(response.content.strip('"'))
+        return bot_responses

    # def process_user_input(self, user_input, content_attachment=None):
    #     # Add user message
@ -382,6 +625,29 @@ class Bot(BaseClass):
    #     return bot_response

    def generate_from_notes(self, user_input, notes):
+        """
+        Generate a response based on user input and a collection of notes.
+        
+        This method takes a user query and relevant notes, formats the notes into a string,
+        creates a prompt with the formatted notes and user input, and generates a streamed response.
+        
+        Parameters
+        ----------
+        user_input : str
+            The user's query or message to respond to
+        notes : list of dict
+            A list of note dictionaries, where each note has 'title' and 'content' keys
+            
+        Returns
+        -------
+        generator
+            A generator that streams the AI-generated response
+            
+        Notes
+        -----
+        This method does not make any Streamlit calls and is safe to use outside of the Streamlit context.
+        The notes are formatted with titles and content separated by horizontal rules.
+        """
        # No Streamlit calls
        notes_string = ""
        for note in notes:
@ -394,6 +660,47 @@ class Bot(BaseClass):
        return self.chatbot.generate(prompt, stream=True)

    def generate_from_chunks(self, user_input, chunks):
+        """
+        Generate a response based on user input and retrieved document chunks.
+        
+        This method formats the retrieved document chunks into a structured string,
+        combines it with the user's input in a prompt, and generates a streaming
+        response using the chatbot.
+        
+        Parameters:
+        -----------
+        user_input : str
+            The user's query or message to respond to.
+        chunks : dict
+            A dictionary containing document chunks organized by title.
+            Expected structure:
+            {
+                "title1": {
+                    "chunks": [
+                        {
+                            "document": "content...",
+                            "metadata": {
+                                "user_notes": "optional notes..."
+                            }
+                        },
+                        ...
+                    ],
+                    "article_number": int
+                },
+                ...
+            }
+        
+        Returns:
+        --------
+        generator
+            A streaming generator of the chatbot's response.
+        
+        Notes:
+        ------
+        - This method does not make any Streamlit API calls.
+        - User notes are included in the formatted content if available.
+        - The formatted content includes titles, article numbers, and document text.
+        """
        # No Streamlit calls
        chunks_string = ""
        for title, group in chunks.items():
@ -419,7 +726,7 @@ class Bot(BaseClass):
        )
        return list(notes)

-    def fetch_science_articles_tool(self, query: str, n_documents: int):
+    def fetch_science_articles_tool(self, query: str, n_documents: int = 6):
        """
        "Fetches information from scientific articles. Use this tool when the user is looking for information from scientific articles."

@ -441,7 +748,7 @@ class Bot(BaseClass):
            query, collections=["sci_articles"], n_results=n_documents
        )

-    def fetch_other_documents_tool(self, query: str, n_documents: int):
+    def fetch_other_documents_tool(self, query: str, n_documents: int = 6):
        """
        Fetches information from other documents based on the user's query.

@ -507,10 +814,8 @@ class Bot(BaseClass):
        """
        Generate a conversational response to a user's query.

-        This method is designed to provide a short and conversational response
-        without fetching additional data. It should be used ONLY when it is clear
-        that the user is engaging in small talk (like saying 'hi') and not seeking detailed information.
-        If the user is asking for informaiton or a qualified answer, don't use this tool!
+        This method is designed to provide a short and conversational response without fetching additional data.
+        It should be used ONLY when it is clear that the user is engaging in small talk (like saying 'hi').

        Args:
            query (str): The user's message to which the bot should respond.
@ -521,7 +826,6 @@ class Bot(BaseClass):
        query = f"""
        User message: "{query}". 
        Make your answer short and conversational. 
-        This is perhaps not a conversation about a journalistic project, so try not to be too informative.
        Don't answer with anything you're not sure of! 
        """

@ -543,12 +847,20 @@ class StreamlitBot(Bot):
            st.session_state["llm_chosen_backend"] = self.chatbot.chosen_backend

        settings = self.get_settings()
-        print("SETTINGS:", settings)
        if settings.get("use_reasoning_model", False):
            self.chatbot.model = self.chatbot.get_model("reasoning")

+        print_rainbow(settings)
+        print('MODEL', self.chatbot.model)
+
    def run(self):
        # Example Streamlit run loop
+        title = (
+            self.project.name
+            if self.project
+            else self.collection.name if self.collection else None
+        )
+        self.chat.show_title(title=title)
        self.chat.show_chat_history()
        if user_input := st.chat_input("Write your message here...", accept_file=True):
            text_input = user_input.text.replace('"""', "---")
@ -597,18 +909,24 @@ class StreamlitBot(Bot):
    def process_user_input(self, user_input, content_attachment=None):
        # We override to show messages in Streamlit instead of just storing
        self.chat.add_message("user", user_input)
+
+        # Remove conversational response tool if there are more than 2 messages
+        if len(self.chat.chat_history) > 2 and len(self.tools) > 1:
+            for tool in self.tools:
+                if tool.__name__ == "conversational_response_tool":
+                    self.tools.remove(tool)
+                    break
        if not content_attachment:
            prompt = get_tools_prompt(user_input)
-            print_rainbow(self.toolbot.__dict__)
            response = self.toolbot.generate(prompt, tools=self.tools, stream=False)
            if response.get("tool_calls"):
                bot_response = self.answer_tool_call(response, user_input)
            else:
                bot_response = response.content.strip('"')
-            with st.chat_message(
-                "assistant", avatar=self.chat.get_avatar(role="assistant")
-            ):
-                st.write(bot_response)
+            # with st.chat_message(
+            #     "assistant", avatar=self.chat.get_avatar(role="assistant")
+            # ):
+            #     st.write(bot_response)
        else:
            with st.chat_message(
                "assistant", avatar=self.chat.get_avatar(role="assistant")
@ -644,67 +962,79 @@ class StreamlitBot(Bot):

        self.chat.update_in_arango()

-    # def answer_tool_call(self, response, user_input): #! This should be in the Basse ChatBot?
-    #     bot_responses = []
-    #     for tool in response.get("tool_calls", []):
-    #         function_name = tool.function.get("name")
-    #         arguments = tool.function.arguments
-    #         arguments["query"] = user_input
-
-    #         print("Function name:", function_name)
-    #         with st.chat_message(
-    #             "assistant", avatar=self.chat.get_avatar(role="assistant")
-    #         ):
-    #             if function_name in [
-    #                 "fetch_other_documents_tool",
-    #                 "fetch_science_articles_tool",
-    #                 "fetch_science_articles_and_other_documents_tool",
-    #             ]:
-    #                 chunks = getattr(self, function_name)(**arguments)
-    #                 response_text = self.generate_from_chunks(user_input, chunks)
-    #                 # Separate thinking chunk and normal chunk
-    #                 print_red("Model:", self.chatbot.model)
-
-    #                 if self.chatbot.model == "reasoning":
-    #                     bot_response = self.write_reasoning(response_text)
-
-    #                 else:
-    #                     bot_response = st.write_normal(response_text)
-    #                 bot_responses.append(bot_response)
-
-    #                 if chunks:
-    #                     sources = "###### Sources:\n"
-    #                     for title, group in chunks.items():
-    #                         j = group["chunks"][0]["metadata"].get(
-    #                             "journal", "No Journal"
-    #                         )
-    #                         d = group["chunks"][0]["metadata"].get(
-    #                             "published_date", "No Date"
-    #                         )
-    #                         sources += f"[{group['article_number']}] **{title}** :gray[*{j}* ({d})]  \n"
-    #                     st.markdown(sources)
-    #                     bot_response += f"\n\n{sources}"
-    #                 bot_responses.append(bot_response)
-
-    #             elif function_name == "fetch_notes_tool":
-    #                 notes = getattr(self, function_name)()
-    #                 response_text = self.generate_from_notes(user_input, notes)
-    #                 bot_responses.append(st.write_stream(response_text).strip('"'))
-
-    #             elif function_name == "conversational_response_tool":
-    #                 response_text = getattr(self, function_name)(user_input)
-    #                 print(
-    #                     "###",
-    #                     self.chatbot.call_model,
-    #                     self.chatbot.get_model("reasoning"),
-    #                 )
-    #                 if self.chatbot.call_model == self.chatbot.get_model("reasoning"):
-    #                     print_blue("REASONING MODEL!")
-    #                     bot_response = self.write_reasoning(response_text).strip('"')
-    #                 else:
-    #                     bot_responses.append(st.write_stream(response_text))
-
-    #     return "\n\n".join(bot_responses)
+    def answer_tool_call(
+        self, response, user_input
+    ):  #! This should be in the Base ChatBot?
+        bot_responses = []
+        tools_response = response.get("tool_calls", [])
+
+        for tool in tools_response:
+            function_name = tool.function.get("name")
+            if len(tools_response) > 1:
+                # Don't use conversational response tool if there are other tools
+                if function_name == "conversational_response_tool":
+                    continue
+            arguments = tool.function.arguments
+            arguments["query"] = user_input
+
+            print("Function name:", function_name)
+            with st.chat_message(
+                "assistant", avatar=self.chat.get_avatar(role="assistant")
+            ):
+                if function_name in [
+                    "fetch_other_documents_tool",
+                    "fetch_science_articles_tool",
+                    "fetch_science_articles_and_other_documents_tool",
+                ]:
+                    chunks = getattr(self, function_name)(**arguments)
+                    response_text = self.generate_from_chunks(user_input, chunks)
+                    # Separate thinking chunk and normal chunk
+                    print_red("Model:", self.chatbot.model)
+
+                    if self.chatbot.model == "reasoning":
+                        bot_response = self.write_reasoning(response_text)
+
+                    else:
+                        bot_response = self.write_normal(response_text)
+                    bot_responses.append(bot_response)
+
+                    if chunks:
+                        sources = "###### Sources:\n"
+                        for title, group in chunks.items():
+                            j = group["chunks"][0]["metadata"].get(
+                                "journal", "No Journal"
+                            )
+                            d = group["chunks"][0]["metadata"].get(
+                                "published_date", "No Date"
+                            )
+                            sources += f"[{group['article_number']}] **{title}** :gray[*{j}* ({d})]  \n"
+                        st.markdown(sources)
+                        bot_response += f"\n\n{sources}"
+                    bot_responses.append(bot_response)
+
+                elif function_name == "fetch_notes_tool":
+                    notes = getattr(self, function_name)()
+                    response_text = self.generate_from_notes(user_input, notes)
+                    bot_responses.append(st.write_stream(response_text).strip('"'))
+
+                elif function_name == "conversational_response_tool":
+                    response_text = getattr(self, function_name)(user_input)
+                    print(
+                        "###",
+                        response_text,
+                    )
+                    if self.chatbot.call_model == self.chatbot.get_model("reasoning"):
+                        print_blue("REASONING MODEL!")
+                        bot_response = self.write_reasoning(response_text).strip('"')
+                    else:
+                        if isinstance(response_text, OllamaMessage):
+                            response_text = response_text.content
+                        elif isinstance(response_text, dict):
+                            response_text = response_text.get("content", "")
+
+                        bot_response = self.write_normal(response_text).strip('"')
+
+        return "\n\n".join(bot_responses)

    def write_reasoning(self, response_text):
        chunks_iter = iter(response_text)  # convert generator to iterator
@ -729,6 +1059,7 @@ class StreamlitBot(Bot):
            return bot_response

        else:
+
            def full_gen():
                if first_mode:
                    yield (first_mode, first_text)
@ -740,17 +1071,13 @@ class StreamlitBot(Bot):
    def write_normal(self, response_text):
        chunks_iter = iter(response_text)  # convert generator to iterator

-        try:
-            first_mode, first_text = next(chunks_iter)  # get first chunk
-        except StopIteration:
-            # no chunks at all
-            first_mode, first_text = None, None
-
        def full_gen():
-            if first_mode:
-                yield (first_mode, first_text)
-            for mode, text in chunks_iter:
-                yield (mode, text)
+            for chunk in chunks_iter:
+                if isinstance(chunk, tuple) and len(chunk) == 2:
+                    _, text = chunk
+                    yield text
+                else:
+                    yield chunk

        bot_response = st.write_stream(full_gen()).strip('"')
        return bot_response
@ -816,6 +1143,7 @@ class ResearchAssistantBot(StreamlitBot):
        self.tools = [
            self.fetch_science_articles_tool,
            self.fetch_science_articles_and_other_documents_tool,
+            self.conversational_response_tool,
        ]


@ -954,7 +1282,7 @@ class HostBot(StreamlitBot):
            Often "conversational_response_tool" is enough, but sometimes project notes are needed. 
            Make sure to read the description of the tools carefully!""",
            chat=False,
-            model="tools"
+            model="tools",
        )

    def generate(self, query):
@ -986,7 +1314,7 @@ class GuestBot(StreamlitBot):
            temperature=0,
            system_message=f"You are an assistant to an expert on {subject}. Choose one or many tools to use in order to assist the expert in answering questions. Make sure to read the description of the tools carefully.",
            chat=False,
-            model="tools"
+            model="tools",
        )

    def generate(self, query):
--- a/streamlit_pages.py
+++ b/streamlit_pages.py
@ -1,37 +1,59 @@
 import streamlit as st
 from time import sleep
 from colorprinter.print_color import *
-from _classes import BotChatPage
+
+
 def Projects():
    """
    Function to handle the Projects page.
    """
    from projects_page import ProjectsPage
-    if 'Projects' not in st.session_state:
-        st.session_state['Projects'] = {}
+
+    if "Projects" not in st.session_state:
+        st.session_state["Projects"] = {}
    projectpage = ProjectsPage(username=st.session_state["username"])
    projectpage.run()

+
 def Bot_Chat():
    """
    Function to handle the Chat Bot page.
    """
+    from chat_page import BotChatPage
+
    print_blue("Bot Chat")
-    
+
    sleep(0.1)
-    if 'Bot Chat' not in st.session_state:
-        st.session_state['Bot Chat'] = {}
+    if "Bot Chat" not in st.session_state:
+        st.session_state["Bot Chat"] = {}
    chatpage = BotChatPage(username=st.session_state["username"])
    chatpage.run()

+
+def Research():
+    """
+    Function to handle the Deep Research page.
+    """
+    from research_page import ResearchPage
+
+    print_blue("Research")
+
+    sleep(0.1)
+    if "Research" not in st.session_state:
+        st.session_state["Research"] = {}
+    researchpage = ResearchPage(username=st.session_state["username"])
+    researchpage.run()
+
+
 def Article_Collections():
    """
    Function to handle the Article Collections page.
    """
    from collections_page import ArticleCollectionsPage
+
    sleep(0.1)
-    if 'Article Collections' not in st.session_state:
-        st.session_state['Article Collections'] = {}
+    if "Article Collections" not in st.session_state:
+        st.session_state["Article Collections"] = {}

    article_collection = ArticleCollectionsPage(username=st.session_state["username"])
    article_collection.run()
@ -41,7 +63,8 @@ def Settings():
    """
    Function to handle the Settings page.
    """
-    from _classes import SettingsPage
+    from settings_page import SettingsPage
+
    settings = SettingsPage(username=st.session_state["username"])
    sleep(0.1)
    settings.run()
@ -51,10 +74,11 @@ def RSS_Feeds():
    """
    Function to handle the RSS Feeds page.
    """
-    from _classes import RSSFeedsPage
-    if 'RSS Feeds' not in st.session_state:
-        st.session_state['RSS Feeds'] = {}
+    from feed_page import RSSFeedsPage
+
+    if "RSS Feeds" not in st.session_state:
+        st.session_state["RSS Feeds"] = {}

    rss_feeds_page = RSSFeedsPage(username=st.session_state["username"])
    sleep(0.1)
-    rss_feeds_page.run()
+    rss_feeds_page.run()
--- a/test_research.py
+++ b/test_research.py
@ -0,0 +1,206 @@
+from _llm import LLM
+from _arango import ArangoDB
+from _chromadb import ChromaDB
+from streamlit_chatbot import Bot
+from pydantic import BaseModel, Field
+from typing import Dict, List, Tuple
+from colorprinter.print_color import *
+from projects_page import Project
+from _base_class import StreamlitBaseClass
+from prompts import get_tools_prompt    
+
+class ResearchBase(Bot):
+    def __init__(self, username, **args):
+        super().__init__(username=username, **args)
+        self.llm = LLM()
+        self.arango = ArangoDB()
+        self.chromadb = ChromaDB()
+        self.messages = []
+
+    def start(self):
+        self.messages = [{"role": "system", "message": self.llm.system_message}]
+        if self.llm.model in ["small", "standard", "vision", "reasoning", "tools"]:
+            self.llm.get_model(self.llm.model)
+
+
+class ResearchManager(ResearchBase):
+    def __init__(self, username, project=None):
+        super().__init__(username=username, project=project)
+        self.llm.system_message = "You are an assistant helping a journalist writing a report based on extensive research."
+        self.llm.model = "reasoning"
+        self.start()
+
+    def generate_plan(self, question):
+        query = f"""
+        A journalist wants to get a report that answers this question: "{question}"
+        THIS IS *NOT* A QUESTION YOU CAN ANSWER! Instead, you need to make a plan for how to answer this question.
+        Include what type of information you need from what available sources.
+        Available sources are:
+        - Scientific articles
+        - Other articles the journalists has gathered, such as blog posts, news articles, etc.
+        - The journalists own notes.
+        - Transcribed interviews (already done, you can't produce new ones).
+        All of the above sources are available in a database, but you need to specify what you need. Be as precise as possible.
+        As you don't have access to the sources, include steps to retrieve excerpts from articles and retrieve those that might be interesting.
+        Also include steps to verify the information.
+        Make the plan easy to follow and structured. 
+        Remember: You are not answering the question, you are making *a plan* for how to answer the question using the available sources.
+        """
+        query += f"\nTo help you understand the subject, here is a summary of notes the journalist has done: {project.notes_summary}"
+        query += """Please structure the plan like:
+        ## Step 1:
+        - Task1: Description of task
+        - Task2: Description of task
+        ## Step 2:
+        - Task1: Description of task
+        - Task2: Description of task
+        Etc, with as many steps and tasks as needed.
+        """
+        return self.llm.generate(query).content
+
+
+class ResearchAssistant(ResearchBase):
+    def __init__(self, username):
+        super().__init__(username)
+        self.llm.system_message = "You are a Research Assistant"
+        self.start()
+
+
+class HelperBot(ResearchBase):
+    def __init__(self, username):
+        super().__init__(username)
+        self.llm.system_message = "You are helping a researcher to structure a text. You will get a text and make it into structured data. Make sure not to change the meaning of the text and keeps all the details in the subtasks."
+        self.llm.model = "small"
+        self.start()
+
+    def make_structured_plan(self, text, question=None):
+
+        class Plan(BaseModel):
+            steps: Dict[str, List[Tuple[str, str]]] = Field(
+                description="Structured plan represented as steps with their corresponding tasks or facts",
+                example={
+                    "Step 1: Gather Existing Materials": [
+                        ("Task 1", "Description of task"),
+                        ("Task 2", "Description of task"),
+                    ],
+                    "Step 2: Extract Relevant Information": [
+                        ("Task 1", "Description of task"),
+                        ("Task 2", "Description of task"),
+                    ],
+                },
+            )
+
+        if question:
+            query = f''' This is a proposed plan for how to write a report on "{question}":\n"""{text}"""\nPlease make the plan into structured data with subtasks. Make sure to keep all the details in the subtasks.'''
+        else:
+            query = f''' This is a proposed plan for how to write a report:\n"""{text}"""\nPlease make the plan into structured data with subtasks. Make sure to keep all the details in the subtasks.'''
+        response = self.llm.generate(query, format=Plan.model_json_schema())
+        print(response)
+        structured_response = Plan.model_validate_json(response.content)
+        print('PLAN')
+        print_rainbow(structured_response)
+        print()
+        return structured_response
+
+
+class ToolBot(ResearchBase):
+    def __init__(self, username, tools: list):
+        super().__init__(username, tools=tools)
+        self.start()
+        tools_names = [tool.__name__ for tool in self.tools]
+        tools_name_string = "\n– ".join(tools_names)
+        self.llm = LLM(
+            temperature=0,
+            system_message=f"""
+            You are an helpful assistant with tools. The tools you can choose from are:
+            {tools_name_string}
+            Your task is to choose one or multiple tools to answering a user's query.
+            DON'T come up with your own tools, only use the ones provided.
+            """,
+            chat=False,
+            model="tools",
+        )
+
+    def propose_tools(self, task):
+        query = f"""What tool(s) would you use to help with this task:
+        "{task}"
+        Answer in a structured way using the tool_calls field!
+        """
+        query = get_tools_prompt(task)
+        response = self.llm.generate(query)
+        print_yellow('Model:', self.llm.model)
+        print_rainbow(response)
+        return response.tool_calls
+
+if __name__ == "__main__":
+
+    base = StreamlitBaseClass(username="lasse")
+    project = Project(
+        username="lasse",
+        project_name="Monarch butterflies",
+        user_arango=base.get_arango(),
+    )
+    rm = ResearchManager(username="lasse", project=project)
+    tb = ToolBot(
+        username="lasse",
+        tools=[
+            "fetch_science_articles_tool",
+            "fetch_notes_tool",
+            "fetch_other_documents_tool",
+            "fetch_science_articles_and_other_documents_tool",
+        ]
+    )
+    # ra = ResearchAssistant(username="lasse")
+    hb = HelperBot(username="lasse")
+
+    question = "Tell me five interesting facts about the Monarch butterfly"
+
+    # Generate plan
+    plan = rm.generate_plan(question)
+# -- Example of what a plan can look like --
+# plan = """## Step-by-Step Plan for Answering the Question: "Tell Me Five Interesting Facts About the Monarch Butterfly"
+
+# ### Step 1: Gather and Organize Existing Materials
+# - **Task 1:** Retrieve all existing materials related to Monarch butterflies from the database using keywords such as "Monarch butterfly migration," "habitat loss," "milkweed," "insecticides," "climate change," "Monarch Butterfly Biosphere Reserve," and "migration patterns."
+# - **Task 2:** Categorize these materials into scientific articles, other articles (blogs, news), own notes, and transcribed interviews for easy access.
+
+# ### Step 2: Extract Relevant Excerpts
+# - **Task 1:** From the retrieved scientific articles, extract information on migration patterns, genetic studies, and population decline factors.
+# - **Task 2:** From blogs and news articles, look for interesting anecdotes or recent findings about conservation efforts and unique behaviors of Monarch butterflies.
+
+# ### Step 3: Identify Potential Interesting Facts
+# - **Task 1:** Review the extracted excerpts to identify potential facts such as migration patterns, threats faced by Monarchs, population decline statistics, conservation efforts, and unique behaviors.
+# - **Task 2:** Compile a list of five compelling and accurate facts based on the extracted information.
+
+# ### Step 4: Verify Information
+# - **Task 1:** Cross-check each fact with multiple sources to ensure accuracy. For example, verify migration details across scientific articles and recent news reports.
+# - **Task 2:** Look for consensus among sources regarding population trends and threats to Monarchs.
+
+# ### Step 5: Structure the Report
+# - **Task 1:** Organize the five selected facts into a coherent structure, ensuring each fact is clearly explained and engaging.
+# - **Task 2:** Incorporate quotes or statistics from sources to add depth and credibility to each fact.
+
+# ### Step 6: Review and Finalize
+# - **Task 1:** Proofread the report for clarity, accuracy, and grammar.
+# - **Task 2:** Ensure all information is presented in an engaging manner suitable for a journalistic report.
+
+# This plan ensures that the journalist systematically gathers, verifies, and presents five interesting facts about Monarch butterflies, providing a comprehensive and accurate report. 
+#     """
+    #print_blue(plan)
+    if "</think>" in plan:
+        plan = plan.split("</think>")[1]
+
+    # Make structured plan
+    structured_plan = hb.make_structured_plan(plan, question)
+
+
+    for step, tasks in structured_plan.steps.items():
+        print_blue("\n### Step:", step)
+        for task in tasks:
+
+            print_blue("Task:", task[0])
+            print_yellow(task[1])
+
+            tools = tb.propose_tools(task[1])
+            print_green("Tools:", tools)
+            print('\n')