Add initial implementation of RSS analyzer and ArangoDB integration

1 year ago · 00fd42b32d
parent 01df43bba2
commit 00fd42b32d
19 changed files with 1474 additions and 249 deletions
--- a/.env
+++ b/.env
@ -0,0 +1,21 @@
+# Chroma
+CHROMA_CLIENT_AUTH_CREDENTIALS="overpass-alms-porker-file-seigneur-kiln"
+CHROMA_SERVER_AUTHN_PROVIDER="chromadb.auth.basic_authn.BasicAuthenticationServerProvider"
+CHROMA_AUTH_TOKEN_TRANSPORT_HEADER="X-Chroma-Token"
+CHROMA_HOST="http://192.168.1.10:8007"
+
+CHROMA_CLIENT_AUTH_CREDENTIALS="overpass-alms-porker-file-seigneur-kiln"
+CHROMA_SERVER_AUTHN_PROVIDER="chromadb.auth.basic_authn.BasicAuthenticationServerProvider"
+CHROMA_AUTH_TOKEN_TRANSPORT_HEADER="X-Chroma-Token"
+_CHROMA_HOST="https://lasseedfast.se/chroma_ev_cars/"
+
+# Arango
+ARANGO_HOST="http://192.168.1.10:8531"
+ARANGO_USER="admin"
+ARANGO_PASSWORD="raHzaw-5vyjqo-xisfec"
+ARANGO_DB="base"
+ARANGO_PWD_ENV_MANAGER="jagskoterenv(Y)"
+ARANGO_ROOT_USER='root'
+ARANGO_ROOT_PASSWORD='gyhqed-kiwNac-9buhme'
+
+MAILERSEND_API_KEY="mlsn.71de3eb2dbcb733bd4ee509d1c95ccfc8939fd647cba9e3a0f631f60f900bd85"
--- a/init.py
+++ b/init.py
@ -0,0 +1 @@
+from pdf_highlighter import Highlighter
--- a/_base_class.py
+++ b/_base_class.py
@ -13,6 +13,7 @@ class BaseClass:
        self.project_name: str = kwargs.get('project_name', None)
        self.collection: str = kwargs.get('collection_name', None)
        self.user_arango: ArangoDB = self.get_arango()
+        self.base_arango: ArangoDB = self.get_arango(admin=True)


    def get_arango(self, admin: bool = False, db_name: str = None) -> ArangoDB:
@ -128,7 +129,7 @@ class BaseClass:
            self.file_path = f"sci_articles/{self.doi}.pdf".replace("/", "_")
            return os.path.exists(self.file_path)
        else:
-            file_path = f"{self.download_folder}/{filename}"
+            file_path = f"{download_folder}/{filename}"
            while os.path.exists(file_path + ".pdf"):
                if not re.search(r"(_\d+)$", file_path):
                    file_path += "_1"
--- a/_chromadb.py
+++ b/_chromadb.py
@ -3,7 +3,6 @@ import os
 from chromadb.config import Settings
 from dotenv import load_dotenv
 from colorprinter.print_color import *
-
 load_dotenv(".env")


@ -96,12 +95,14 @@ class ChromaDB:
                    if k not in r["included"]:
                        continue
                    result[k][0] = v[0][: n_results - (n_sources - len(sources))]
-            if "_id" in where:
+            if where and "_id" in where:
                where["_id"]["$in"] = [
                    i for i in where["_id"]["$in"] if i not in sources
                ]
                if where["_id"]["$in"] == []:
                    break
+            else:
+                break
        return result


@ -109,7 +110,8 @@ if __name__ == "__main__":
    from colorprinter.print_color import *

    chroma = ChromaDB()
-
+    print(chroma.db.list_collections())
+    exit()
    result = chroma.query(
        query="What is Open Science)",
        collection="sci_articles",
@ -117,19 +119,5 @@ if __name__ == "__main__":
        n_sources=3,
        max_retries=4,
    )
-    print(result)
-    exit()
-    all = chroma_collection.get()
-
-    ids = all.get("ids", [])
-    metadatas = all.get("metadatas", [])
-
-    combined_list = list(zip(ids, metadatas))
-
-    ids = []
-    metadatas = []
-    for id, metadata in combined_list:
-        ids.append(id)
-        metadata["_id"] = f"sci_articles/{metadata['_key']}"
-        metadatas.append(metadata)
-    chroma_collection.update(ids=ids, metadatas=metadatas)
+    print_rainbow(result['metadatas'][0])
+   
--- a/_classes.py
+++ b/_classes.py
@ -1,22 +1,28 @@
 # streamlit_pages.py
-
+import os
+import feedparser
+import urllib
+from urllib.parse import urljoin
+import requests
 import re
+from bs4 import BeautifulSoup
 import streamlit as st
 from time import sleep
-import pandas as pd
 from datetime import datetime, timedelta
 from PIL import Image
 from io import BytesIO
 import base64
 from colorprinter.print_color import *
 from article2db import PDFProcessor
-from streamlit_chatbot import Chat, EditorBot, ResearchAssistantBot, PodBot
+import feedparser

+from streamlit_chatbot import Chat, EditorBot, ResearchAssistantBot, PodBot, Bot
 from info import country_emojis
 from utils import fix_key
 from _arango import ArangoDB
 from _llm import LLM
 from _base_class import BaseClass
+from _rss import RSSReader

 from prompts import get_note_summary_prompt, get_image_system_prompt

@ -399,6 +405,12 @@ class BotChatPage(BaseClass):
                "chat": self.chat,
                "role": self.role,
            }
+        else:
+            bot = Bot(
+                username=self.username,
+                chat=Chat(username=self.username, role="Research Assistant"),
+            )
+            bot.run()

    def sidebar_actions(self):
        with st.sidebar:
@ -680,7 +692,6 @@ class Project(BaseClass):

    def load_project(self):
        print_blue("Project name:", self.name)
-        print(self.user_arango, type(self.user_arango))
        project_cursor = self.user_arango.db.aql.execute(
            "FOR doc IN projects FILTER doc.name == @name RETURN doc",
            bind_vars={"name": self.name},
@ -927,3 +938,103 @@ class SettingsPage(BaseClass):
            self.update_settings("avatar", img_path)
            st.success("Profile picture uploaded")
            sleep(1)
+
+
+class RSSFeedsPage(BaseClass):
+    def __init__(self, username: str):
+        super().__init__(username=username)
+        self.page_name = "RSS Feeds"
+        self.reader = RSSReader(username=username)
+        # Initialize attributes from session state if available
+        for k, v in st.session_state.get(self.page_name, {}).items():
+            setattr(self, k, v)
+
+    def run(self):
+        if "selected_feed" not in st.session_state:
+            st.session_state["selected_feed"] = None
+        self.update_current_page(self.page_name)
+        self.display_feed()
+        self.sidebar_actions()
+        self.update_session_state(page_name=self.page_name)
+
+    def select_rss_feeds(self):
+        rss_feeds = self.reader.get_rss_feeds()
+        if rss_feeds:
+            feed_options = [feed["title"] for feed in rss_feeds]
+            with st.sidebar:
+                st.subheader("Show your feeds")
+                selected_feed_title = st.selectbox(
+                    "Select a feed", options=feed_options, index=None
+                )
+                if selected_feed_title:
+                    st.session_state["selected_feed"] = [
+                        feed["_key"]
+                        for feed in rss_feeds
+                        if feed["title"] == selected_feed_title
+                    ][0]
+                    st.rerun()
+        else:
+            st.write("You have no RSS feeds added.")
+
+    def search_feeds(self, rss_url):
+        with st.spinner("Discovering feeds..."):
+            feeds = self.reader.discover_feeds(rss_url)
+            if feeds:
+                st.session_state["discovered_feeds"] = feeds
+            else:
+                st.error("No RSS feeds found at the provided URL.")
+    
+    def sidebar_actions(self):
+        if 'discovered_feeds' not in st.session_state:
+            st.session_state['discovered_feeds'] = None
+
+        with st.sidebar:
+            self.select_rss_feeds()
+            st.subheader("Add a New RSS Feed")
+            with st.form("add_rss_feed"):
+                rss_url = st.text_input("Website URL or RSS Feed URL")
+                submitted = st.form_submit_button("Discover Feeds")
+                if submitted:
+                    print_green(rss_url)
+                    feeds = self.reader.discover_feeds(rss_url)
+                    st.session_state['discovered_feeds'] = feeds
+
+            if st.session_state["discovered_feeds"]:
+                st.subheader("Select a Feed to Add")
+                feeds = st.session_state["discovered_feeds"]
+                feed_options = [f"{feed['title']} ({feed['href']})" for feed in feeds]
+                selected_feed = st.selectbox("Available Feeds", options=feed_options)
+                selected_feed_url = feeds[feed_options.index(selected_feed)]["href"]
+
+                if st.button("Preview Feed"):
+                    feed = self.reader.parse_feed(selected_feed_url)
+                    st.write(f"{feed.title}")
+                    description = self.reader.html_to_markdown(feed.description)
+                    st.write(f"_{description}_")
+                    for entry in feed.entries[:5]:
+                        with st.expander(entry["title"]):
+                            summary = entry.get("summary", "No summary available")
+                            markdown_summary = self.reader.html_to_markdown(summary)
+                            st.markdown(markdown_summary)
+                    print_yellow(selected_feed_url)
+
+                if st.button(
+                    "Add RSS Feed",
+                    on_click=self.reader.add_rss_feed,
+                    args=[selected_feed_url],
+                ):
+                    del st.session_state["discovered_feeds"]
+                    st.success("RSS Feed added.")
+                    st.rerun()
+
+    def display_feed(self):
+        if st.session_state["selected_feed"]:
+            self.reader.get_feed(st.session_state["selected_feed"])
+            st.title(self.reader.feed.title)
+            st.write(f"_{self.reader.feed.description}_")
+            for entry in self.reader.feed.entries[:5]:
+                with st.expander(entry["title"]):
+                    summary = entry.get("summary", "No summary available")
+                    markdown_summary = self.reader.html_to_markdown(summary)
+                    st.markdown(markdown_summary)
+                    st.markdown(f"[Read more]({entry['link']})")
--- a/_llm.py
+++ b/_llm.py
@ -1,3 +1,4 @@
+import re
 import os
 from typing import Literal, Optional
 import requests
@ -5,20 +6,18 @@ from requests.auth import HTTPBasicAuth
 import tiktoken
 import json
 from colorprinter.print_color import *
-import env_manager
-import re
-
+import asyncio

+import env_manager
 env_manager.set_env()

 tokenizer = tiktoken.get_encoding("cl100k_base")

-print(os.getenv("LLM_API_USER"), os.getenv("LLM_API_PWD_LASSE"))
+
 class LLM:
    def __init__(
        self,
        system_message="You are an assistant.",
-        num_ctx=8192,
        temperature=0.01,
        model: Optional[Literal["small", "standard", "vision"]] = "standard",
        max_length_answer=4096,
@ -31,7 +30,6 @@ class LLM:

        Args:
            system_message (str): The initial system message for the assistant. Defaults to "You are an assistant.".
-            num_ctx (int): The number of context tokens to use. Defaults to 4096.
            temperature (float): The temperature setting for the model's response generation. Defaults to 0.01.
            chat (bool): Flag to indicate if the assistant is in chat mode. Defaults to True.
            model (str): The model type to use. Defaults to "standard". Alternatives: 'small', 'standard', 'vision'.
@ -43,7 +41,7 @@ class LLM:
        """
        self.model = self.get_model(model)
        self.system_message = system_message
-        self.options = {"temperature": temperature, "num_ctx": num_ctx}
+        self.options = {"temperature": temperature}
        self.messages = messages or [{"role": "system", "content": self.system_message}]
        self.max_length_answer = max_length_answer
        self.chat = chat
@ -68,73 +66,117 @@ class LLM:
                    tokens = tokenizer.encode(v)
                    num_tokens += len(tokens)
        return int(num_tokens)
-
+    
    def read_stream(self, response):
-        """
-        Reads a stream of data from the given response object and yields the content of each message.
-
-        Args:
-            response (requests.Response): The response object to read the stream from.
-
-        Yields:
-            str: The content of each message in the stream.
-
-        Notes:
-            - The response is expected to provide data in chunks, which are decoded as UTF-8.
-            - Lines are split by newline characters.
-            - Each line is expected to be a JSON object containing a "message" key with a "content" field.
-            - If a chunk cannot be decoded as UTF-8, it is skipped.
-            - If a line cannot be parsed as JSON, it is skipped.
-        """
        buffer = ""
        message = ""
+        first_chunk = True
+        prev_content = None  # Store the previous content chunk
        for chunk in response.iter_content(chunk_size=64):
            if chunk:
                try:
                    message_part = chunk.decode("utf-8")
                    buffer += message_part
                    message += message_part
-
                except UnicodeDecodeError:
                    continue
                while "\n" in buffer:
                    line, buffer = buffer.split("\n", 1)
-                    if line:
+                    if line.strip():
                        try:
                            json_data = json.loads(line)
-                            yield json_data["message"]["content"]
+                            content = json_data["message"]["content"]
+                            done = json_data.get("done", False)
+    
+                            # Remove leading '"' from the first content
+                            if first_chunk and content.startswith('"'):
+                                content = content[1:]
+                            first_chunk = False 
+    
+                            if done:
+                                # If the last content ends with '"', remove it
+                                if prev_content and prev_content.endswith('"'):
+                                    prev_content = prev_content[:-1]
+                                # Yield the last content
+                                if prev_content:
+                                    yield prev_content
+                                break
+                            else:
+                                # Yield the previous content before storing the current
+                                if prev_content:
+                                    yield prev_content
+                                prev_content = content
                        except json.JSONDecodeError:
                            continue
+        # Append the full message without leading/trailing quotes
        self.messages.append({"role": "assistant", "content": message.strip('"')})
+    def make_summary(self, text):
+        data = {
+            "messages": [
+                {
+                    "role": "system",
+                    "content": """You are summarizing a text. Make it detailed and concise. Answer ONLY with the summary. Don't add any new information.""",
+                },
+                {
+                    "role": "user",
+                    "content": f'Summarise the text below:\n"""{text}"""\nRemember to be concise and detailed. Answer in English.',
+                },
+            ],
+            "stream": False,
+            "keep_alive": 3600 * 24 * 7,
+            "model": self.get_model("small"),
+            "options": {"temperature": 0.01},
+        }
+        response = requests.post(
+            os.getenv("LLM_API_URL"),
+            json=data,
+            auth=HTTPBasicAuth(
+                os.getenv("LLM_API_USER"), os.getenv("LLM_API_PWD_LASSE")
+            ),
+        )
+        print_blue("Summary:", response.json()["message"]["content"])
+        return response.json()["message"]["content"]

    def generate(
        self,
-        query,
-        stream=False,
-        tools=None,
-        function_call=None,
+        query: str = None,
+        user_input: str = None,
+        context: str = None,
+        stream: bool = False,
+        tools: list = None,
+        function_call: dict = None,
        images: list = None,
        model: Optional[Literal["small", "standard", "vision"]] = None,
-        temperature=None,
+        temperature: float = None,
    ):
        """
-        Generates a response from the language model based on the provided query and options.
+        Generates a response from the language model based on the provided inputs.
+        If user_input is provided, it is included in the message history instead of the query.
+        If context is provided, it is summaried if len() > 2000 and included in the message history.
+
        Args:
-            query (str): The input query to be processed by the language model.
+            query (str, optional): The main query string to be processed by the model.
+            user_input (str, optional): User input to be included in the message history.
+            context (str, optional): Contextual information to be included in the message history.
            stream (bool, optional): Whether to stream the response. Defaults to False.
-            tools (list, optional): A list of tools to be used by the language model. Defaults to None.
-            function_call (dict, optional): A dictionary specifying a function call to be made by the language model. Defaults to None.
-            images (list, optional): A list of image paths or base64-encoded images to be included in the request. Defaults to None.
-            model (str, optional): The model alias to be used for generating the response. Defaults to None. Alternatives: 'small', 'standard', 'vision'.
+            tools (list, optional): List of tools to be included in the request.
+            function_call (dict, optional): Dictionary specifying a function call to be made.
+            images (list, optional): List of image paths or base64-encoded images to be included.
+            model (Optional[Literal["small", "standard", "vision"]], optional): The model type to be used. Defaults to None.
+            temperature (float, optional): The temperature setting for the model. Defaults to None.
+
        Returns:
            str: The generated response from the language model. If streaming is enabled, returns the streamed response.
        """
+
        # Add custom header if large model is chosen
        model = self.get_model(model) if model else self.model
        temperature = temperature if temperature else self.options["temperature"]

        # Normalize whitespace and add the query to the messages
+
        query = re.sub(r"\s*\n\s*", "\n", query)
+
        message = {"role": "user", "content": query}

        headers = {"Content-Type": "application/json"}
@ -158,6 +200,11 @@ class LLM:
                            base64_images.append(
                                base64.b64encode(image_file.read()).decode("utf-8")
                            )
+                elif isinstance(image, bytes):
+                    base64_images.append(base64.b64encode(image).decode("utf-8"))
+                else:
+                    print_red("Invalid image type")
+
            message["images"] = base64_images
            # Set the Content-Type header based on the presence of images
            headers = {"Content-Type": "application/json; images"}
@ -165,18 +212,15 @@ class LLM:
            # Set the model type to the vision model
            if self.chosen_backend:
                headers["X-Chosen-Backend"] = self.chosen_backend
-                

        self.messages.append(message)

        # Set the number of tokens to be the sum of the tokens in the messages and half of the max length of the answer
        if self.chat or len(self.messages) > 15000:
            num_tokens = self.count_tokens() + self.max_length_answer / 2
-            if num_tokens < 8000 and "num_ctx" in self.options:
-                del self.options["num_ctx"]
-            else:
+            if num_tokens > 8000:
                model = self.get_model("large")
-                headers["X-Model-Type"] = "standard_64k"
+                headers["X-Model-Type"] = "large"

        if tools:
            stream = False
@ -197,7 +241,9 @@ class LLM:
        if function_call:
            data["function_call"] = function_call

-
+        if data['model'] == 'small':
+            headers["X-Model-Type"] = "small"
+        
        response = requests.post(
            os.getenv("LLM_API_URL"),
            headers=headers,
@ -209,7 +255,21 @@ class LLM:
            timeout=3600,
        )

-        self.chosen_backend = response.headers.get('X-Chosen-Backend')
+        # If user_input is provided, change the last message to user_input and a summary of the context (if provided)
+        # This needs to be done after the request to LLM for the LLM to have the original message
+        if user_input:
+            if context:
+                if len(context) > 2000:
+                    context = self.make_summary(context)
+                user_input = f'''{user_input}\n\nUse the information below to answer the question.\n"""{context}"""\n[This is a summary of the context provided in the original message.]'''
+                system_message_info = "\nSometimes some of the messages in the chat history are summarised, then that is clearly indicated in the message."
+                if system_message_info not in self.messages[0]["content"]:
+                    self.messages[0]["content"] = (
+                        self.messages[0]["content"] + system_message_info
+                    )
+            self.messages[-1] = {"role": "user", "content": user_input}
+
+        self.chosen_backend = response.headers.get("X-Chosen-Backend")

        if response.status_code != 200:
            print_red("Error!")
@ -233,7 +293,9 @@ class LLM:
                    result = response_json["message"]
                else:
                    result = response_json["message"]["content"].strip('"')
-                    self.messages.append({"role": "assistant", "content": result.strip('"')})
+                    self.messages.append(
+                        {"role": "assistant", "content": result.strip('"')}
+                    )
            except requests.exceptions.JSONDecodeError:
                print_red("Error: ", response.status_code, response.text)
                return "An error occurred."
@ -242,6 +304,33 @@ class LLM:
                self.messages = [self.messages[0]]
            return result

+    async def async_generate(
+        self,
+        query: str = None,
+        user_input: str = None,
+        context: str = None,
+        stream: bool = False,
+        tools: list = None,
+        function_call: dict = None,
+        images: list = None,
+        model: Optional[Literal["small", "standard", "vision"]] = None,
+        temperature: float = None,
+    ):
+        loop = asyncio.get_event_loop()
+        return await loop.run_in_executor(
+            None,
+            self.generate,
+            query,
+            user_input,
+            context,
+            stream,
+            tools,
+            function_call,
+            images,
+            model,
+            temperature,
+        )
+    

 if __name__ == "__main__":
    llm = LLM()
--- a/_rss.py
+++ b/_rss.py
@ -0,0 +1,260 @@
+# rss_reader.py
+import feedparser
+import requests
+import urllib
+from bs4 import BeautifulSoup
+from urllib.parse import urljoin
+from datetime import datetime, timedelta
+from utils import fix_key
+import os
+from _base_class import BaseClass
+from _llm import LLM
+from colorprinter.print_color import *
+
+
+class RSSFeed:
+    def __init__(self):
+        self.url = None
+        self.title = None
+        self.icon_path = None
+        self.description = None
+        self.feed_data = None
+        self.fetched_timestamp = None
+        self.entries = []
+
+
+class RSSReader(BaseClass):
+    def __init__(self, username):
+        super().__init__(username=username)
+        self.username = username
+        self.user_arango = self.get_arango(username)
+        self.feed: RSSFeed = None
+        self.arango_feed = None
+
+    def discover_feeds(self, url):
+        try:
+            if not url.startswith("http"):
+                url = "https://" + url
+
+            # Check if the input URL is already an RSS feed
+            f = feedparser.parse(url)
+            if len(f.entries) > 0:
+                return [
+                    {
+                        "href": url,
+                        "title": f.feed.get("title", "No title"),
+                        "icon": self.get_site_icon(url),
+                    }
+                ]
+
+            # If not, proceed to discover feeds from the webpage
+            raw = requests.get(url).text
+            result = []
+            possible_feeds = []
+            html = BeautifulSoup(raw, "html.parser")
+
+            # Find the site icon
+            icon_url = self.get_site_icon(url, html)
+
+            # Find all <link> tags with rel="alternate" and type containing "rss" or "xml"
+            feed_urls = html.findAll("link", rel="alternate")
+            for f in feed_urls:
+                t = f.get("type", None)
+                if t and ("rss" in t or "xml" in t):
+                    href = f.get("href", None)
+                    if href:
+                        possible_feeds.append(urljoin(url, href))
+
+            # Find all <a> tags with href containing "rss", "xml", or "feed"
+            parsed_url = urllib.parse.urlparse(url)
+            base = parsed_url.scheme + "://" + parsed_url.hostname
+            atags = html.findAll("a")
+            for a in atags:
+                href = a.get("href", None)
+                if href and ("rss" in href or "xml" in href or "feed" in href):
+                    possible_feeds.append(urljoin(base, href))
+
+            # Validate the possible feeds using feedparser
+            for feed_url in list(set(possible_feeds)):
+                f = feedparser.parse(feed_url)
+                if len(f.entries) > 0:
+                    result.append(
+                        {
+                            "href": feed_url,
+                            "title": f.feed.get("title", "No title"),
+                            "icon": icon_url,
+                        }
+                    )
+
+            return result
+        except Exception as e:
+            print(f"Error discovering feeds: {e}")
+            return []
+
+    def add_rss_feed(self, url):
+
+        self.get_feed(url)
+
+        self.load_feed_from_url(url=url)
+        self.feed._key = fix_key(self.feed.url)
+
+        # Store feed data in base_arango's rss_feeds collection
+        self.base_arango.db.collection("rss_feeds").insert(self.feed.__dict__)
+
+        # Store a reference to the feed in user_arango's user_feeds collection
+        self.user_arango.db.collection("user_feeds").insert(
+            {
+                "_key": self.feed._key,  # Use the same key to reference the feed
+                "feed_key": self.feed._key,
+                "subscribed_on": datetime.now().isoformat(),
+                # Add additional user-specific fields here
+            },
+            overwrite=True,
+        )
+
+    def load_feed_from_url(self, url=None, data=None):
+        if url:
+            self.feed = RSSFeed()
+            self.feed.url = url
+            full_feed_data = feedparser.parse(url)
+        elif data:
+            self.feed = RSSFeed()
+            self.feed.url = data.get("url", None)
+            full_feed_data = data
+        else:
+            full_feed_data = feedparser.parse(self.feed.url)
+
+        self.feed.title = full_feed_data["feed"].get("title", "No title")
+        self.feed.description = full_feed_data["feed"].get(
+            "description", "No description"
+        )
+        self.feed.icon_path = self.get_site_icon(self.feed.url)
+        self.feed.entries = []
+
+        for entry in full_feed_data["entries"]:
+            self.feed.entries.append(
+                {
+                    "title": entry.get("title", "No title"),
+                    "link": entry.get("link"),
+                    "published": entry.get("published"),
+                    "summary": self.html_to_markdown(
+                        entry.get("summary", "No summary")
+                    ),
+                    "id": entry.get("id"),
+                    "author": entry.get("author"),
+                }
+            )
+        self.feed.fetched_timestamp = datetime.now().isoformat()
+
+    def feed_data2feed(self, data):
+        self.load_feed_from_url(data=data)
+
+    def parse_feed(self, url):
+        self.load_feed_from_url(url=url)
+        return self.feed
+
+    def update_feed(self):
+        self.load_feed_from_url()
+        # Update the feed in the database
+        self.user_arango.db.collection("rss_feeds").update(
+            {
+                "_key": self.feed._key,
+                "fetched_timestamp": self.feed.fetched_timestamp,
+                "entries": self.feed.entries,
+            }
+        )
+        return self.feed.entries
+
+    def get_feed(self, feed_key=None, url=None, _id=None):
+        if feed_key:
+            arango_doc = self.base_arango.db.collection("rss_feeds").get(feed_key)
+        elif url:
+            arango_doc = self.base_arango.db.aql.execute(
+                f"FOR doc IN rss_feeds FILTER doc.url == '{url}' LIMIT 1 RETURN doc", count=True).next()
+        elif _id:
+            arango_doc = self.base_arango.db.aql.execute(
+                f"FOR doc IN rss_feeds FILTER doc.id == '{_id}' LIMIT 1 RETURN doc", count=True).next()
+        
+        if arango_doc:
+            self.feed = RSSFeed()
+            for attr in arango_doc:
+                setattr(self.feed, attr, arango_doc[attr])
+
+            fetched_time = datetime.fromisoformat(self.feed.fetched_timestamp)
+
+            if datetime.now() - fetched_time < timedelta(hours=1):
+                return self.feed.entries
+            else:
+                return self.update_feed()
+
+    def get_site_icon(self, url, html=None):
+        try:
+            if not html:
+                raw = requests.get(url).text
+                html = BeautifulSoup(raw, "html.parser")
+
+            icon_link = html.find("link", rel="icon")
+            if icon_link:
+                icon_url = icon_link.get("href", None)
+                if icon_url:
+                    return urljoin(url, icon_url)
+
+            # Fallback to finding other common icon links
+            icon_link = html.find("link", rel="shortcut icon")
+            if icon_link:
+                icon_url = icon_link.get("href", None)
+                if icon_url:
+                    return urljoin(url, icon_url)
+
+            return None
+        except Exception as e:
+            print(f"Error getting site icon: {e}")
+            return None
+
+    def get_rss_feeds(self):
+        return list(self.user_arango.db.collection("rss_feeds").all())
+
+    def download_icon(self, icon_url, save_folder="external_icons"):
+        try:
+            if not os.path.exists(save_folder):
+                os.makedirs(save_folder)
+
+            response = requests.get(icon_url, stream=True)
+            if response.status_code == 200:
+                icon_name = os.path.basename(icon_url)
+                icon_path = os.path.join(save_folder, icon_name)
+                with open(icon_path, "wb") as f:
+                    for chunk in response.iter_content(1024):
+                        f.write(chunk)
+                return icon_path
+            else:
+                print(f"Failed to download icon: {response.status_code}")
+                return None
+        except Exception as e:
+            print(f"Error downloading icon: {e}")
+            return None
+
+    def html_to_markdown(self, html):
+        soup = BeautifulSoup(html, "html.parser")
+        for br in soup.find_all("br"):
+            br.replace_with("\n")
+        for strong in soup.find_all("strong"):
+            strong.replace_with(f"**{strong.text}**")
+        for em in soup.find_all("em"):
+            em.replace_with(f"*{em.text}*")
+        for p in soup.find_all("p"):
+            p.replace_with(f"{p.text}\n\n")
+        return soup.get_text()
+
+    def get_full_content(self, url):
+
+        result = requests.get(url)
+        soup = BeautifulSoup(result.content, "html.parser")
+
+
+class RSSAnalyzer(BaseClass):
+    def init(self, username):
+        super().__init__(username=username)
+        self.llm = LLM(system_message="You are reading RSS Feeds to analyze them.")
+        self.user_arango = self.get_arango_db(username)
+        self.rss_reader = RSSReader(username, self.base_arango, self.user_arango)
--- a/arango_admin.py
+++ b/arango_admin.py
@ -0,0 +1,6 @@
+from _arango import ArangoDB    
+
+
+for db in ['lasse', 'nisse', 'torill', 'irma']:
+    arango  = ArangoDB(db_name=db)
+    arango.db.create_collection('rss_feeds')
--- a/article2db.py
+++ b/article2db.py
@ -238,11 +238,15 @@ class Processor:
        local_chroma_deployment: bool = False,
        process: bool = True,
        document_type: str = None,
+        username: str = None,
    ):
        self.document = document
        self.chromadb = ChromaDB(local_deployment=local_chroma_deployment, db=chroma_db)
        self.len_chunks = len_chunks
        self.document_type = document_type
+        self.filename = filename
+
+        self.username = username if username else document.username

        self._id = None

@ -353,7 +357,7 @@ class Processor:
            )
        else:
            chroma_collection = self.chromadb.db.get_or_create_collection(
-                "other_documents"
+                f"{self.username}__other_documents"
            )

        chroma_collection.add(ids=ids, documents=documents, metadatas=metadatas)
@ -454,11 +458,13 @@ class Processor:
            model="small",
            max_length_answer=500,
        )
-        text = pymupdf4llm.to_markdown(
-            self.document.pdf, page_chunks=False, show_progress=False, pages=[0, 1]
-        )
        if len(self.document.pdf) == 1:
            pages = [0]
+        else:
+            pages = [0, 1]
+        text = pymupdf4llm.to_markdown(
+            self.document.pdf, page_chunks=False, show_progress=False, pages=pages
+        )
        prompt = f'''
            Below is the beginning of an article. I want to know when it's published, the title, and the journal.

@ -468,7 +474,8 @@ class Processor:

            Answer ONLY with the information requested.
            I want to know the published date on the form "YYYY-MM-DD".
-            I want the full title of the article and the journal.
+            I want the full title of the article.
+            I want the name of the journal/paper/outlet where the article was published.
            Be sure to answer on the form "published_date;title;journal" as the answer will be used in a CSV.
            If you can't find the information, answer "not_found".
            '''
@ -553,9 +560,10 @@ class Processor:
        if response.status_code == 200:
            data = response.json()
            if data.get("results", []) == []:
-                print(f"DOI {doi} not found in DOAJ.")
+                print_yellow(f"{doi} not found in DOAJ.")
                return False
            else:
+                print_green(f"{doi} found in DOAJ.")
                return data
        else:
            print(
@ -648,11 +656,18 @@ class Processor:
                        only_meta=True
                    )
            if "_key" not in self.document.doc:
-                _key = (
-                    self.document.doi
-                    or self.document.title
-                    or self.document.get_title()
-                )
+
+                if self.document.doi:
+                    _key = self.document.doi
+                elif self.document.title:
+                    _key = self.document.title
+                elif self.document.get_title():
+                    _key = self.document.get_title()
+                elif 'title' in self.document.doc["metadata"] and self.document.doc["metadata"]["title"]:
+                    _key = self.document.doc["metadata"]["title"]
+                else:
+                    _key = self.document.pdf_file.name
+
                print_yellow(f"Document key: {_key}")
                print(self.document.doi, self.document.title, self.document.get_title())
                self.document.doc["_key"] = fix_key(_key)
--- a/manage_users.py
+++ b/manage_users.py
@ -0,0 +1,192 @@
+import yaml
+import sys
+import bcrypt
+from _arango import ArangoDB
+import os
+import dotenv
+import getpass
+import argparse
+import string
+import secrets
+from utils import fix_key
+from colorprinter.print_color import *
+
+dotenv.load_dotenv()
+
+
+def read_yaml(file_path):
+    with open(file_path, "r") as file:
+        return yaml.safe_load(file)
+
+
+def write_yaml(file_path, data):
+    with open(file_path, "w") as file:
+        yaml.safe_dump(data, file)
+
+
+def add_user(data, username, email, name, password):
+    # Check for existing username
+    if username in data["credentials"]["usernames"]:
+        print(f"Error: Username '{username}' already exists.")
+        sys.exit(1)
+
+    # Check for existing email
+    for user in data["credentials"]["usernames"].values():
+        if user["email"] == email:
+            print(f"Error: Email '{email}' already exists.")
+            sys.exit(1)
+
+    # Hash the password using bcrypt
+    hashed_password = bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()).decode(
+        "utf-8"
+    )
+
+    # Add the new user
+    data["credentials"]["usernames"][username] = {
+        "email": email,
+        "name": name,
+        "password": hashed_password,
+    }
+
+
+def make_arango(username):
+    root_user = os.getenv("ARANGO_ROOT_USER")
+    root_password = os.getenv("ARANGO_ROOT_PASSWORD")
+    arango = ArangoDB(user=root_user, password=root_password, db_name="_system")
+
+    if not arango.db.has_database(username):
+        arango.db.create_database(
+            username,
+            users=[
+                {
+                    "username": os.getenv("ARANGO_USER"),
+                    "password": os.getenv("ARANGO_PASSWORD"),
+                    "active": True,
+                    "extra": {},
+                }
+            ],
+        )
+    arango = ArangoDB(user=root_user, password=root_password, db_name=username)
+    for collection in [
+        "projects",
+        "favorite_articles",
+        "article_collections",
+        "settings",
+        "chats",
+        "notes",
+        "other_documents",
+        "rss_feeds",
+    ]:
+        if not arango.db.has_collection(collection):
+            arango.db.create_collection(collection)
+    user_arango = ArangoDB(db_name=username)
+    user_arango.db.collection("settings").insert(
+        {"current_page": "Bot Chat", "current_project": None}
+    )
+
+
+def generate_random_password(length=16):
+    characters = string.ascii_letters + string.digits
+    password = "-".join(
+        "".join(secrets.choice(characters) for _ in range(6)) for _ in range(3)
+    )
+    return password
+
+def delete_user(data, username):
+    # Check if the user exists
+    if username not in data["credentials"]["usernames"]:
+        print(f"Error: Username '{username}' does not exist.")
+        sys.exit(1)
+
+    # Remove the user from the YAML data
+    del data["credentials"]["usernames"][username]
+
+    # Remove the user's database in ArangoDB
+    root_user = os.getenv("ARANGO_ROOT_USER")
+    root_password = os.getenv("ARANGO_ROOT_PASSWORD")
+    base_arango = ArangoDB(user=root_user, password=root_password, db_name="base")
+    # Remove the user's database in ArangoDB
+    root_user = os.getenv("ARANGO_ROOT_USER")
+    root_password = os.getenv("ARANGO_ROOT_PASSWORD")
+    arango = ArangoDB(user=root_user, password=root_password, db_name="_system")
+    if arango.db.has_database(username):
+        arango.db.delete_database(username)
+
+    # Remove user access from documents in relevant collections
+    collections = ["sci_articles", "other_documents"]
+    for collection_name in collections:
+        documents = base_arango.db.aql.execute(
+            """
+            FOR doc IN @@collection_name
+                FILTER @username IN doc.user_access
+                RETURN {'_id': doc._id, 'user_access': doc.user_access}
+            """,
+            bind_vars={"username": username, "@collection_name": collection_name},
+        )
+        for document in documents:
+            if 'user_access' in document:
+                # Remove username from the list user_access
+                document['user_access'].remove(username)
+                base_arango.db.collection(collection_name).update(document)
+
+    print_green(f"User {username} deleted successfully.")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Add or delete a user.")
+    parser.add_argument("--user", help="Username")
+    parser.add_argument("--email", help="Email address")
+    parser.add_argument("--name", help="Full name")
+    parser.add_argument("--password", help="Password")
+    parser.add_argument("--delete", action="store_true", help="Delete user")
+
+    args = parser.parse_args()
+
+    yaml_file = "streamlit_users.yaml"
+    data = read_yaml(yaml_file)
+
+    if args.delete:
+        if args.user:
+            username = args.user
+            delete_user(data, username)
+            write_yaml(yaml_file, data)
+        else:
+            print("Error: Username is required to delete a user.")
+            sys.exit(1)
+    else:
+        if args.user and args.email and args.name:
+            username = args.user
+            email = args.email
+            name = args.name
+            if args.password and len(args.password) >= 8:
+                password = args.password
+            else:
+                password = generate_random_password()
+                print_yellow("Generated password:", password)
+        else:
+            username = input("Enter username: ")
+            email = input("Enter email: ")
+            name = input("Enter name: ")
+            password = getpass.getpass("Enter password: ")
+            if not password or password == "":
+                password = generate_random_password()
+                print_yellow("Generated password:", password)
+
+        if username == 'test':
+            delete_user(data, username)
+
+        email = email.lower().strip()
+        checked_username = fix_key(username)
+        if checked_username != username:
+            username = checked_username
+            print_red(f"Username '{username}' contains invalid characters.")
+            print_yellow(f"Using '{checked_username}' instead.")
+
+        add_user(data, username, email, name, password)
+        make_arango(username)
+        write_yaml(yaml_file, data)
+        print_green(f"User {username} added successfully.")
+
+
+if __name__ == "__main__":
+    main()
--- a/new_user.py
+++ b/new_user.py
@ -1,97 +0,0 @@
-import yaml
-import sys
-import bcrypt
-from _arango import ArangoDB
-import os
-import dotenv
-import getpass
-
-dotenv.load_dotenv()
-
-
-def read_yaml(file_path):
-    with open(file_path, "r") as file:
-        return yaml.safe_load(file)
-
-
-def write_yaml(file_path, data):
-    with open(file_path, "w") as file:
-        yaml.safe_dump(data, file)
-
-
-def add_user(data, username, email, name, password):
-    # Check for existing username
-    if username in data["credentials"]["usernames"]:
-        print(f"Error: Username '{username}' already exists.")
-        sys.exit(1)
-
-    # Check for existing email
-    for user in data["credentials"]["usernames"].values():
-        if user["email"] == email:
-            print(f"Error: Email '{email}' already exists.")
-            sys.exit(1)
-
-    # Hash the password using bcrypt
-    hashed_password = bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()).decode(
-        "utf-8"
-    )
-
-    # Add the new user
-    data["credentials"]["usernames"][username] = {
-        "email": email,
-        "name": name,
-        "password": hashed_password,
-    }
-
-
-def make_arango(username):
-    root_user = os.getenv("ARANGO_ROOT_USER")
-    root_password = os.getenv("ARANGO_ROOT_PASSWORD")
-    arango = ArangoDB(user=root_user, password=root_password, db_name="_system")
-    
-    if not arango.db.has_database(username):
-        arango.db.create_database(
-            username,
-            users=[
-                {
-                    "username": os.getenv("ARANGO_USER"),
-                    "password": os.getenv("ARANGO_PASSWORD"),
-                    "active": True,
-                    "extra": {},
-                }
-            ]
-        )
-    arango = ArangoDB(user=root_user, password=root_password, db_name=username)
-    for collection in ["projects", "favorite_articles", "article_collections", "settings", 'chats', 'notes', 'other_documents']:
-        if not arango.db.has_collection(collection):
-            arango.db.create_collection(collection)
-    user_arango = ArangoDB(db_name=username)
-    user_arango.db.collection("settings").insert(
-        {"current_page": 'Bot Chat', "current_project": None}
-    )
-
-
-def main():
-
-    yaml_file = "streamlit_users.yaml"
-    if len(sys.argv) == 5:
-        username = sys.argv[1]
-        email = sys.argv[2]
-        name = sys.argv[3]
-        password = sys.argv[4]
-    else:
-        username = input("Enter username: ")
-        email = input("Enter email: ")
-        name = input("Enter name: ")
-        password = getpass.getpass("Enter password: ")
-    
-
-    data = read_yaml(yaml_file)
-    add_user(data, username, email, name, password)
-    make_arango(username)
-    write_yaml(yaml_file, data)
-    print(f"User {username} added successfully.")
-
-
-if __name__ == "__main__":
-    main()
--- a/prompts.py
+++ b/prompts.py
@ -28,7 +28,7 @@ def get_assistant_prompt():
    You should not write a reference section as this will be added later.
    Format your answers in Markdown format. """

-def get_editor_prompt(project: "Project", tools: bool = False):
+def get_editor_prompt(project: "Project"):
    """Generates a coaching prompt for an editor to assist a reporter with a specific project.

    Args:
@ -51,14 +51,32 @@ def get_editor_prompt(project: "Project", tools: bool = False):

    return f'''You are an editor coaching a journalist who is working on the project "{project.name}". {description_string(project)}
    {notes_string}
-    When writing with the reporter you will also get other information, like excerpts from articles and other documents. Use the notes to put the information in context and help the reporter to move forward.
+    When writing with the reporter you will _often_ get other information, like excerpts from articles and other documents. Use the notes to put the information in context and help the reporter to move forward.
+    If no other information is provided, try to answer based on the conversation history. If there is no history, and you're requested to answer in a conversational way, don't pretent to know things you don't have information about.
    The project is a journalistic piece, so it is important that you help the reporter to be critical of the sources and to provide a balanced view of the topic.
    Be sure to understand what the reporter is asking and provide the information in a way that is helpful for the reporter to move forward. Try to understand if the reporter is asking for a specific piece of information or if they are looking for guidance on how to move forward, or just want to discuss the topic.
    If you need more information to answer the question, try to get it.
    '''

-def get_chat_prompt(user_input, content_string, role):
-    if role == "Research Assistant":
+def get_chat_prompt(user_input, role, content_string=None, content_attachment=None, image_attachment=False):
+    
+    if image_attachment:
+        return f'''{user_input}
+        Use the attached image to write your response.
+        '''
+
+    elif content_attachment:
+            return f'''{user_input} 
+        Content of the attached file:
+         """
+         {content_attachment}
+         """
+         Respond to "{user_input}" based on the information in the attachment.
+         Fomat your answer in a way that is easy to understand for a general audience, and in an basic Markdown format.
+         '''
+
+    
+    elif role == "Research Assistant":

        prompt = f'''{user_input}
        Below are snippets from different articles, often with title and date of publication.
@ -151,10 +169,9 @@ def get_image_system_prompt(project: "Project"):
    return re.sub(r"\s*\n\s*", "\n", system_message)

 def get_tools_prompt(user_input):
-    return f'''The reporter has asked: "{user_input}" 
-    What information is needed to answer the question? Choose one or many tools in order to answer the question. Make sure to read the description of the tools carefully before choosing.
-    If you are shure that you can answer the question in a correct way without fetching data, you can do that as well.
-
+    return f'''User message: "{user_input}" 
+    Choose one or many tools in order to answer the message. It's important that you think of what information (if any) is needed to make a good answer. 
+    Make sure to read the description of the tools carefully before choosing!
    '''


--- a/rss_analyzer.py
+++ b/rss_analyzer.py
--- a/streamlit_app.py
+++ b/streamlit_app.py
@ -8,12 +8,17 @@ from time import sleep
 from colorprinter.print_color import *
 from _arango import ArangoDB

+
 def get_settings():
    """
    Function to get the settings from the ArangoDB.
    """
    arango = ArangoDB(db_name=st.session_state["username"])
-    st.session_state["settings"] = arango.db.collection("settings").get("settings")
+    settings = arango.db.collection("settings").get("settings")
+    if settings:
+        st.session_state["settings"] = settings
+    else:
+        st.session_state["settings"] = {'current_collection': None, 'current_page': None}
    return st.session_state["settings"]


@ -49,7 +54,14 @@ if st.session_state["authentication_status"]:
    for _ in range(3):

        try:
-            from streamlit_pages import Article_Collections, Bot_Chat, Projects, Settings
+            from streamlit_pages import (
+                Article_Collections,
+                Bot_Chat,
+                Projects,
+                Settings,
+                RSS_Feeds
+            )
+
            break
        except ImportError as e:
            # Write the full error traceback
@ -57,25 +69,54 @@ if st.session_state["authentication_status"]:
            print_red(e)
            print("Retrying to import pages...")

-    get_settings()
-    if 'current_page' in st.session_state["settings"]:
+    st.session_state["settings"] = get_settings()
+    if isinstance(st.session_state["settings"], dict) and "current_page" in st.session_state["settings"]:
        st.session_state["current_page"] = st.session_state["settings"]["current_page"]
    else:
-        if 'current_page' not in st.session_state:
+        if "current_page" not in st.session_state:
            st.session_state["current_page"] = None

    if "not_downloaded" not in st.session_state:
        st.session_state["not_downloaded"] = {}
-    
+
    # Pages
    bot_chat = st.Page(Bot_Chat)
    projects = st.Page(Projects)
    article_collections = st.Page(Article_Collections)
    settings = st.Page(Settings)
-
-
-    pg = st.navigation([bot_chat, projects, article_collections, settings])
-    pg.run()
+    rss_feeds = st.Page(RSS_Feeds)
+
+    pg = st.navigation([bot_chat, projects, article_collections, rss_feeds, settings])
+    try:
+        pg.run()
+    except Exception as e:
+        print_red(e)
+        st.error("An error occurred. The site will be reloaded.")
+        import traceback
+        from datetime import datetime
+        from time import sleep
+
+        traceback_string = traceback.format_exc()
+        traceback.print_exc()
+        arango = ArangoDB(db_name="base")
+        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+        print_rainbow(st.session_state.to_dict())
+        session_state = st.session_state.to_dict()
+        if 'bot' in session_state:
+            del session_state['bot']
+        arango.db.collection("error_logs").insert(
+            {
+                "error": traceback_string,
+                "_key": timestamp,
+                "session_state": session_state,
+            },
+            overwrite=True,
+        )
+        with st.status(":red[An error occurred. The site will be reloaded.]"):
+            for i in range(5):
+                sleep(1)
+                st.write(f"Reloading in {5-i} seconds...")
+        st.rerun()
    with st.sidebar:
        st.write("---")
        authenticator.logout()
@ -84,4 +125,4 @@ if st.session_state["authentication_status"]:
 elif st.session_state["authentication_status"] is False:
    st.error("Username/password is incorrect")
 elif st.session_state["authentication_status"] is None:
-    st.warning("Please enter your username and password")
+    st.warning("Please enter your username and password")
--- a/streamlit_chatbot.py
+++ b/streamlit_chatbot.py
@ -13,10 +13,15 @@ class Chat(BaseClass):
        self.name = kwargs.get("name", None)
        self.chat_history = kwargs.get("chat_history", [])
        self.role = role
+        self.message_attachments = None

    def add_message(self, role, content):
        self.chat_history.append(
-            {"role": role, "content": content.strip().strip('"'), "role_type": self.role}
+            {
+                "role": role,
+                "content": content.strip().strip('"'),
+                "role_type": self.role,
+            }
        )

    def to_dict(self):
@ -102,26 +107,27 @@ class Bot(BaseClass):
        if not self.collection and self.project:
            self.collection = self.project.collections

+        
        if not isinstance(self.collection, list):
            self.collection = [self.collection]
-        
+
        # Load articles in the collections
-        self.arango_ids  = []
+        self.arango_ids = []
        for collection in self.collection:
            for _id in self.user_arango.db.aql.execute(
-                '''
+                """
                FOR doc IN article_collections
                FILTER doc.name == @collection
                FOR article IN doc.articles
                    RETURN article._id
-                ''',
+                """,
                bind_vars={"collection": collection},
-                ):
+            ):
                self.arango_ids.append(_id)
-        
+
        self.chosen_backend = kwargs.get("chosen_backend", None)

-        self.chatbot: LLM = None
+        self.chatbot: LLM = LLM()
        self.tools: list[dict] = None

        self.chatbot_memory = None
@ -138,13 +144,17 @@ class Bot(BaseClass):

        self.toolbot = LLM(
            temperature=0,
-            system_message="Choose one or many tools to use in order to assist the user. Make sure to read the description of the tools carefully.",
+            system_message="""
+            You are an assistant bot helping an answering bot to answer a user's messages. 
+            Your task is to choose one or multiple tools that will help the answering bot to provide the user with the best possible answer.
+            Try to understand if the answering bot needs any information to answer the user's message, and if so, choose the tool that will provide that information.
+            You should NEVER try to answer the user's message yourself, only choose the tool that will help the answering bot to answer the user's message.
+            ** Make sure to read the description of the tools carefully! **
+            You MUST choose a tool, if no additional information is needed, choose "conversational_response".""",
            chat=False,
            model="small",
        )

-
-
        # self.sidebar_content()

    def sidebar_content(self):
@ -265,27 +275,52 @@ class Bot(BaseClass):

        return grouped_chunks

-    def process_user_input(self, user_input):
+    def process_user_input(self, user_input, content_attachment=None):

        # Add user's message to chat history
        self.chat.add_message("user", user_input)

        # Generate response with tool support
-        prompt = get_tools_prompt(user_input)
-        response = self.toolbot.generate(prompt, tools=self.tools, stream=False)
-        print_yellow("Tool to use")
-        # Check if the LLM wants to use a tool
-        if isinstance(response, dict) and "tool_calls" in response:
-            bot_response = self.answer_tool_call(response, user_input)
+        if not content_attachment:
+            prompt = get_tools_prompt(user_input)
+            response = self.toolbot.generate(prompt, tools=self.tools, stream=False)
+            # Check if the LLM wants to use a tool
+            if isinstance(response, dict) and "tool_calls" in response:
+                print_yellow("Tool(s) to use:", response["tool_calls"])
+                bot_response = self.answer_tool_call(
+                    response, user_input=user_input
+                )

+            else:
+                # Use the LLM's direct response
+                bot_response = response.strip('"')
+                with st.chat_message(
+                    "assistant", avatar=self.chat.get_avatar(role="assitant")
+                ):
+                    st.write(bot_response)
        else:
-            # Use the LLM's direct response
-            bot_response = response.strip('"')
            with st.chat_message(
-                "assistant", avatar=self.chat.get_avatar(role="assitant")
+                "assistant", avatar=self.chat.get_avatar(role="assistant")
            ):
-                st.write(bot_response)
-
+                with st.spinner("Reading the content..."):
+                    if self.chat.message_attachments == 'image':
+                        prompt = get_chat_prompt(
+                            user_input, role=self.chat.role, image_attachment=True
+                        )                        
+                        print_yellow("Content attachment:", type(content_attachment))
+                        bot_response = self.chatbot.generate(
+                            prompt,
+                            stream=False,
+                            images=[content_attachment],
+                            model="vision",
+                        )
+                        st.write(bot_response)
+                    else:
+                        prompt = get_chat_prompt(
+                            user_input, content_attachment=content_attachment, role=self.chat.role
+                        )
+                        response = self.chatbot.generate(prompt, stream=True)
+                        bot_response = st.write_stream(response)
        # Add assistant's message to chat history
        if self.chat.chat_history[-1]["role"] != "assistant":
            self.chat.add_message("assistant", bot_response)
@ -323,6 +358,7 @@ class Bot(BaseClass):
                ]:
                    chunks = getattr(self, function_name)(**arguments)
                    # Provide the tool's output back to the LLM
+                    
                    response = self.generate_from_chunks(user_input, chunks)
                    bot_response = st.write_stream(response)
                    bot_response = bot_response.strip('"')
@ -357,7 +393,7 @@ class Bot(BaseClass):
        for note in notes:
            notes_string += f"\n# {note['title']}\n{note['content']}\n---\n"

-        prompt = get_chat_prompt(user_input, notes_string, role=self.chat.role)
+        prompt = get_chat_prompt(user_input, content_string=notes_string, role=self.chat.role)

        with st.spinner("Reading project notes..."):
            return self.chatbot.generate(prompt, stream=True)
@ -377,7 +413,7 @@ class Bot(BaseClass):
                f"{chunks_content_string}\n---\n"
            )

-        prompt = get_chat_prompt(user_input, chunks_string, role=self.chat.role)
+        prompt = get_chat_prompt(user_input, content_string=chunks_string, role=self.chat.role)

        magazines = list(
            set(
@ -394,7 +430,7 @@ class Bot(BaseClass):
            s = "Reading articles..."
        with st.spinner(s):
            return (
-                self.chatbot.generate(prompt, stream=True)
+                self.chatbot.generate(prompt, user_input=user_input, context=chunks_string, stream=True)
                if self.chatbot
                else self.llm.generate(prompt, stream=True)
            )
@ -405,8 +441,43 @@ class Bot(BaseClass):

        # Display chat history
        self.chat.show_chat_history()
+        self.attachment = 'image'
+
+        if user_input := st.chat_input("Write your message here...", accept_file=True):
+            user_input.text = user_input.text.replace('"""', '---')
+            if len(user_input.files) > 1:
+                st.error("Please upload only one file at a time.")
+            if user_input.files:
+                print(user_input.files)
+                attached_file = user_input.files[0]
+                if attached_file.type == "application/pdf":
+                    # Read the PDF content
+                    pdf_content = attached_file.read()
+                    # Open the PDF with PyMuPDF
+                    import fitz
+
+                    pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
+                    content_attachment = ""
+                    for page_num in range(len(pdf_document)):
+                        page = pdf_document.load_page(page_num)
+                        content_attachment += page.get_text()
+                    print_green("PDF text:", content_attachment)
+                elif (
+                    attached_file.type == "image/png"
+                    or attached_file.type == "image/jpeg"
+                ):
+                    self.chat.message_attachments = 'image'
+                    content_attachment = attached_file.read()
+                    with st.chat_message(
+                        "user", avatar=self.chat.get_avatar(role="user")
+                    ):
+                        st.image(content_attachment)
+
+            else:
+                content_attachment = None
+
+            user_input = user_input.text

-        if user_input := st.chat_input("Write your message here..."):
            with st.chat_message("user", avatar=self.chat.get_avatar(role="user")):
                st.write(user_input)
            if not self.chat.name:
@ -428,7 +499,7 @@ class Bot(BaseClass):
                        }
                    )
                    self.chat_key = chat_doc["_key"]
-            self.process_user_input(user_input)
+            self.process_user_input(user_input, content_attachment)
            self.update_session_state()

    def get_notes(self):
@ -453,6 +524,11 @@ class Bot(BaseClass):
        },
    )
    def fetch_science_articles(self, query: str, n_documents: int):
+        n_documents = int(n_documents)
+        if n_documents < 3:
+            n_documents = 3
+        elif n_documents > 10:
+            n_documents = 10
        return self.get_chunks(
            query, collections=["sci_articles"], n_results=n_documents
        )
@ -472,8 +548,13 @@ class Bot(BaseClass):
        },
    )
    def fetch_other_documents(self, query: str, n_documents: int):
+        n_documents = int(n_documents)
+        if n_documents < 2:
+            n_documents = 2
+        elif n_documents > 10:
+            n_documents = 10
        return self.get_chunks(
-            query, collections=["other_documents"], n_results=n_documents
+            query, collections=[f"{self.username}__other_documents"], n_results=n_documents
        )

    @ToolRegistry.register(
@ -491,25 +572,35 @@ class Bot(BaseClass):
        },
    )
    def fetch_science_articles_and_other_documents(self, query: str, n_documents: int):
+        n_documents = int(n_documents)
+        if n_documents < 3:
+            n_documents = 3
+        elif n_documents > 10:
+            n_documents = 10
        return self.get_chunks(
            query,
-            collections=["sci_articles", "other_documents"],
+            collections=["sci_articles", f"{self.username}__other_documents"],
            n_results=n_documents,
        )

    @ToolRegistry.register(
        name="fetch_notes",
-        description="Fetches information from the project notes when you as an editor need context from the project notes to understand other information. ONLY use this together with other tools!",
+        description="Fetches information from the project notes when you as an editor need context from the project notes to understand other information. ONLY use this together with other tools! No arguments needed.",
    )
    def fetch_notes(self):
        return self.get_notes()

    @ToolRegistry.register(
        name="conversational_response",
-        description="Generates a conversational response without fetching data. Use this ONLY if it is obvious that the user is not looking for information but only wants to chat.",
+        description="Let the answering bot write a response without fetching data. Use this ONLY if it is obvious that the user is not looking for information but only wants to smalltalk (like saying 'hi'). No arguments or needed.",
    )
    def conversational_response(self, query: str):
-        query = f'User message: "{query}". Make your answer short and conversational. Include a very brief description of the project if you think that would be helpful.'
+        query = f"""
+        User message: "{query}". 
+        Make your answer short and conversational. 
+        This is perhaps not a conversation about a journalistic project, so don't try to be too informative.
+        Don't answer with anything you're not sure of! 
+        """
        result = (
            self.chatbot.generate(query, stream=True)
            if self.chatbot
@ -550,7 +641,6 @@ class ResearchAssistantBot(Bot):
        self.tools = ToolRegistry.get_tools(
            tools=[
                "fetch_science_articles",
-                "fetch_other_documents",
                "fetch_science_articles_and_other_documents",
            ]
        )
@ -572,7 +662,11 @@ class PodBot(Bot):
        self.instructions = instructions
        self.guest_name = kwargs.get("name_guest", "Merit")
        self.hostbot = HostBot(
-            Chat(username=self.username, role="Host"), subject, username, instructions=instructions, **kwargs
+            Chat(username=self.username, role="Host"),
+            subject,
+            username,
+            instructions=instructions,
+            **kwargs,
        )
        self.guestbot = GuestBot(
            Chat(username=self.username, role="Guest"),
@ -583,7 +677,7 @@ class PodBot(Bot):
        )

    def run(self):
-        
+
        notes = self.get_notes()
        notes_string = ""
        if self.instructions:
@ -607,12 +701,11 @@ class PodBot(Bot):
        Say hello to the expert and start the interview. Remember to keep the interview to the subject of {self.subject} throughout the conversation.
        '''

+        # Stop button for the podcast
        with st.sidebar:
-            stop = st.button("Stop the podcast")
-            if stop:
-                st.session_state["make_podcast"] = False
+            stop = st.button("Stop podcast", on_click=self.stop_podcast)
+        
        while st.session_state["make_podcast"]:
-            
            # Stop the podcast if there are more than 14 messages in the chat
            self.chat.show_chat_history()
            if len(self.chat.chat_history) == 14:
@ -633,8 +726,7 @@ class PodBot(Bot):
                stream=False,
            )
            if "tool_calls" in _q:
-                print_yellow("Tool call response (host)", _q)
-                print_purple("HOST", self.hostbot.chat.role)
+                print_yellow("Tool call response (host)", _q['tool_calls'])
                q = self.hostbot.answer_tool_call(_q, a)
            else:
                q = _q
@ -653,26 +745,33 @@ class PodBot(Bot):
                a = _a
            self.chat.add_message("Guest", a)

-
            self.update_session_state()

-
+    def stop_podcast(self):
+        st.session_state["make_podcast"] = False
+        self.update_session_state()
+        print_rainbow(st.session_state.to_dict())
+        self.chat.show_chat_history()
+        
 class HostBot(Bot):
-    def __init__(self, chat: Chat, subject: str, username: str, instructions: str, **kwargs):
+    def __init__(
+        self, chat: Chat, subject: str, username: str, instructions: str, **kwargs
+    ):
        super().__init__(chat=chat, username=username, **kwargs)
        self.chat.role = kwargs.get("role", "Host")
        self.tools = ToolRegistry.get_tools(
            tools=[
                "fetch_notes",
                "conversational_response",
-                "fetch_other_documents",
+                #"fetch_other_documents", #TODO Should this be included?
            ]
        )
        self.instructions = instructions
        self.llm = LLM(
            system_message=f'''
-            You are the host of a podcast and an expert on {subject}. You will ask one question at a time about the subject, and then wait for the answer. 
+            You are the host of a podcast and an expert on {subject}. You will ask one question at a time about the subject, and then wait for the guest to answer. 
            Don't ask the guest to talk about herself/himself, only about the subject.
+            Make your questions short and clear, only if necessary add a brief context to the question.
            These are the instructions for the podcast from the producer:
            """
            {self.instructions}
@ -682,11 +781,11 @@ class HostBot(Bot):
        )
        self.toolbot = LLM(
            temperature=0,
-            system_message='''
+            system_message="""
            You are assisting a podcast host in asking questions to an expert. 
            Choose one or many tools to use in order to assist the host in asking relevant questions. 
-            Often "conversational_response" is enough, but sometimes notes are needed or even other documents. 
-            Make sure to read the description of the tools carefully!''',
+            Often "conversational_response" is enough, but sometimes project notes are needed. 
+            Make sure to read the description of the tools carefully!""",
            chat=False,
            model="small",
        )
@ -709,6 +808,7 @@ class GuestBot(Bot):
            system_message=f"""
            You are {kwargs.get('name', 'Merit')}, an expert on {subject}. 
            Today you are a guest in a podcast about {subject}. A host will ask you questions about the subject and you will answer by using scientific facts and information.
+            When answering, don't say things like "based on the documents" or alike, as neither the host nor the audience can see the documents. Act just as if you were talking to someone in a conversation.
            Try to be concise when answering, and remember that the audience of the podcast is not expert on the subject, so don't complicate things too much.
            It's very important that you answer in a "spoken" way, as if you were talking to someone in a conversation. That means you should avoid using scientific jargon and complex terms, too many figures or abstract concepts. 
            Lists are also not recommended, instead use "for the first reason", "secondly", etc.
--- a/streamlit_pages.py
+++ b/streamlit_pages.py
@ -18,7 +18,7 @@ def Bot_Chat():
    Function to handle the Chat Bot page.
    """
    from _classes import BotChatPage
-    if 'bot_chat_page' not in st.session_state:
+    if 'Bot Chat' not in st.session_state:
        st.session_state['Bot Chat'] = {}
    chatpage = BotChatPage(username=st.session_state["username"])
    chatpage.run()
@ -28,7 +28,7 @@ def Article_Collections():
    Function to handle the Article Collections page.
    """
    from _classes import ArticleCollectionsPage
-    if 'article_collections' not in st.session_state:
+    if 'Article Collections' not in st.session_state:
        st.session_state['Article Collections'] = {}

    article_collection = ArticleCollectionsPage(username=st.session_state["username"])
@ -42,3 +42,15 @@ def Settings():
    from _classes import SettingsPage
    settings = SettingsPage(username=st.session_state["username"])
    settings.run()
+
+
+def RSS_Feeds():
+    """
+    Function to handle the RSS Feeds page.
+    """
+    from _classes import RSSFeedsPage
+    if 'RSS Feeds' not in st.session_state:
+        st.session_state['RSS Feeds'] = {}
+
+    rss_feeds_page = RSSFeedsPage(username=st.session_state["username"])
+    rss_feeds_page.run()
--- a/streamlit_rss_old.py
+++ b/streamlit_rss_old.py
@ -0,0 +1,345 @@
+import os
+import urllib
+import streamlit as st
+from _base_class import BaseClass
+import feedparser
+import requests
+from bs4 import BeautifulSoup
+from urllib.parse import urljoin
+from utils import fix_key
+from colorprinter.print_color import *
+from datetime import datetime, timedelta
+
+
+class RSSFeedsPage(BaseClass):
+    def __init__(self, username: str):
+        super().__init__(username=username)
+        self.page_name = "RSS Feeds"
+
+        # Initialize attributes from session state if available
+        for k, v in st.session_state.get(self.page_name, {}).items():
+            setattr(self, k, v)
+
+    def run(self):
+        if "selected_feed" not in st.session_state:
+            st.session_state["selected_feed"] = None
+        self.update_current_page(self.page_name)
+        self.display_feed()
+
+        self.sidebar_actions()
+
+        # Persist state to session_state
+        self.update_session_state(page_name=self.page_name)
+
+    def select_rss_feeds(self):
+        # Fetch RSS feeds from the user's ArangoDB collection
+        rss_feeds = self.get_rss_feeds()
+        if rss_feeds:
+            feed_options = [feed["title"] for feed in rss_feeds]
+            with st.sidebar:
+                st.subheader("Show your feeds")
+                selected_feed_title = st.selectbox(
+                    "Select a feed", options=feed_options, index=None
+                )
+                if selected_feed_title:
+                    st.session_state["selected_feed"] = [
+                        feed["_key"]
+                        for feed in rss_feeds
+                        if feed["title"] == selected_feed_title
+                    ][0]
+                    st.rerun()
+
+        else:
+            st.write("You have no RSS feeds added.")
+
+    def get_rss_feeds(self):
+        return list(self.user_arango.db.collection("rss_feeds").all())
+
+    def sidebar_actions(self):
+        with st.sidebar:
+            # Select a feed to show
+            self.select_rss_feeds()
+            st.subheader("Add a New RSS Feed")
+            rss_url = st.text_input("Website URL or RSS Feed URL")
+            if st.button("Discover Feeds"):
+                if rss_url:
+                    with st.spinner("Discovering feeds..."):
+                        feeds = self.discover_feeds(rss_url)
+                    if feeds:
+                        st.session_state["discovered_feeds"] = feeds
+                        st.rerun()
+                    else:
+                        st.error("No RSS feeds found at the provided URL.")
+            if "discovered_feeds" in st.session_state:
+                st.subheader("Select a Feed to Add")
+                feeds = st.session_state["discovered_feeds"]
+                feed_options = [f"{feed['title']} ({feed['href']})" for feed in feeds]
+                selected_feed = st.selectbox("Available Feeds", options=feed_options)
+                selected_feed_url = feeds[feed_options.index(selected_feed)]["href"]
+                if st.button("Preview Feed"):
+                    feed_data = feedparser.parse(selected_feed_url)
+                    st.write(f"{feed_data.feed.get('title', 'No title')}")
+                    description = html_to_markdown(
+                        feed_data.feed.get("description", "No description")
+                    )
+                    st.write(f"_{description}_")
+                    for entry in feed_data.entries[:5]:
+                        print("ENTRY:")
+                        with st.expander(entry.title):
+                            summary = (
+                                entry.summary
+                                if "summary" in entry
+                                else "No summary available"
+                            )
+                            markdown_summary = html_to_markdown(summary)
+                            st.markdown(markdown_summary)
+                    if st.button(
+                        "Add RSS Feed",
+                        on_click=self.add_rss_feed,
+                        args=(selected_feed_url, feed_data, description),
+                    ):
+
+                        del st.session_state["discovered_feeds"]
+                        st.success("RSS Feed added.")
+                        st.rerun()
+
+    def discover_feeds(self, url):
+        try:
+            if not url.startswith("http"):
+                url = "https://" + url
+
+            # Check if the input URL is already an RSS feed
+            f = feedparser.parse(url)
+            if len(f.entries) > 0:
+                return [
+                    {
+                        "href": url,
+                        "title": f.feed.get("title", "No title"),
+                        "icon": self.get_site_icon(url),
+                    }
+                ]
+
+            # If not, proceed to discover feeds from the webpage
+            raw = requests.get(url).text
+            result = []
+            possible_feeds = []
+            html = BeautifulSoup(raw, "html.parser")
+
+            # Find the site icon
+            icon_url = self.get_site_icon(url, html)
+
+            # Find all <link> tags with rel="alternate" and type containing "rss" or "xml"
+            feed_urls = html.findAll("link", rel="alternate")
+            for f in feed_urls:
+                t = f.get("type", None)
+                if t and ("rss" in t or "xml" in t):
+                    href = f.get("href", None)
+                    if href:
+                        possible_feeds.append(urljoin(url, href))
+
+            # Find all <a> tags with href containing "rss", "xml", or "feed"
+            parsed_url = urllib.parse.urlparse(url)
+            base = parsed_url.scheme + "://" + parsed_url.hostname
+            atags = html.findAll("a")
+            for a in atags:
+                href = a.get("href", None)
+                if href and ("rss" in href or "xml" in href or "feed" in href):
+                    possible_feeds.append(urljoin(base, href))
+
+            # Validate the possible feeds using feedparser
+            for feed_url in list(set(possible_feeds)):
+                f = feedparser.parse(feed_url)
+                if len(f.entries) > 0:
+                    result.append(
+                        {
+                            "href": feed_url,
+                            "title": f.feed.get("title", "No title"),
+                            "icon": icon_url,
+                        }
+                    )
+
+            return result
+        except Exception as e:
+            print(f"Error discovering feeds: {e}")
+            return []
+
+
+    def add_rss_feed(self, url, feed_data, description):
+        try:
+            icon_url = feed_data["feed"]["image"]["href"]
+        except Exception as e:
+            icon_url = self.get_site_icon(url)
+    
+        title = feed_data["feed"].get("title", "No title")
+        print_blue(title)
+        icon_path = download_icon(icon_url) if icon_url else None
+        _key = fix_key(url)
+        now_timestamp = datetime.now().isoformat()  # Convert datetime to ISO format string
+    
+        self.user_arango.db.collection("rss_feeds").insert(
+            {
+                "_key": _key,
+                "url": url,
+                "title": title,
+                "icon_path": icon_path,
+                "description": description,
+                'fetched_timestamp': now_timestamp,  # Add the timestamp field
+                'feed_data': feed_data,
+            },
+            overwrite=True,
+        )
+    
+        feed = self.get_feed_from_arango(_key)
+        now_timestamp = datetime.now().isoformat()  # Convert datetime to ISO format string
+        if feed:
+            self.update_feed(_key, feed)
+        else:
+            self.base_arango.db.collection("rss_feeds").insert(
+                {
+                    "_key": _key,
+                    "url": url,
+                    "title": title,
+                    "icon_path": icon_path,
+                    "description": description,
+                    'fetched_timestamp': now_timestamp,  # Add the timestamp field
+                    "feed_data": feed_data,
+                },
+                overwrite=True,
+                overwrite_mode="update",
+            )
+    def update_feed(self, feed_key, feed=None):
+        """
+        Updates RSS feed that already exists in the ArangoDB base database.
+
+        Args:
+            feed_key (str): The key identifying the feed in the database.
+
+        Returns:
+            dict: The parsed feed data.
+
+        Raises:
+            Exception: If there is an error updating the feed in the database.
+        """
+        if not feed:
+            feed = self.get_feed_from_arango(feed_key)
+        
+        feed_data = feedparser.parse(feed["url"])
+        print_rainbow(feed_data['feed'])
+        feed["feed_data"] = feed_data
+        if self.username not in feed.get("users", []):
+            feed["users"] = feed.get("users", []) + [self.username]
+        fetched_timestamp = datetime.now().isoformat()  # Convert datetime to ISO format string
+
+        # Update the fetched_timestamp in the database
+        self.base_arango.db.collection("rss_feeds").update(
+            {
+                "_key": feed["_key"],
+                "fetched_timestamp": fetched_timestamp,
+                "feed_data": feed_data,
+            }
+        )
+        return feed_data
+    
+
+    def update_session_state(self, page_name=None):
+        # Update session state
+        if page_name:
+            st.session_state[page_name] = self.__dict__
+
+    def get_site_icon(self, url, html=None):
+        try:
+            if not html:
+                raw = requests.get(url).text
+                html = BeautifulSoup(raw, "html.parser")
+
+            icon_link = html.find("link", rel="icon")
+            if icon_link:
+                icon_url = icon_link.get("href", None)
+                if icon_url:
+                    return urljoin(url, icon_url)
+
+            # Fallback to finding other common icon links
+            icon_link = html.find("link", rel="shortcut icon")
+            if icon_link:
+                icon_url = icon_link.get("href", None)
+                if icon_url:
+                    return urljoin(url, icon_url)
+
+            return None
+        except Exception as e:
+            print(f"Error getting site icon: {e}")
+            return None
+
+    def get_feed_from_arango(self, feed_key):
+        """
+        Retrieve an RSS feed from the ArangoDB base databse.
+
+        Args:
+            feed_key (str): The key of the RSS feed to retrieve from the ArangoDB base database.
+
+        Returns:
+            dict: The RSS feed document retrieved from the ArangoDB base database.
+        """
+        return self.base_arango.db.collection("rss_feeds").get(feed_key)
+
+   
+    def get_feed(self, feed_key):
+        feed = self.get_feed_from_arango(feed_key)
+        feed_data = feed["feed_data"]
+        fetched_time = datetime.fromisoformat(feed['fetched_timestamp'])  # Parse the timestamp string
+    
+        if datetime.now() - fetched_time < timedelta(hours=1):
+            return feed_data
+        else:
+            return self.update_feed(feed_key)
+
+
+    def display_feed(self):
+        if st.session_state["selected_feed"]:
+            feed_data = self.get_feed(st.session_state["selected_feed"])
+
+            st.title(feed_data['feed'].get("title", "No title"))
+            st.write(feed_data['feed'].get("description", "No description"))
+            st.write("**Recent Entries:**")
+            for entry in feed_data['entries'][:5]:
+                with st.expander(entry['title']):
+                    summary = (
+                        entry['summary'] if "summary" in entry else "No summary available"
+                    )
+                    markdown_summary = html_to_markdown(summary)
+                    st.markdown(markdown_summary)
+                    st.markdown(f"[Read more]({entry['link']})")
+
+
+def html_to_markdown(html):
+    soup = BeautifulSoup(html, "html.parser")
+    for br in soup.find_all("br"):
+        br.replace_with("\n")
+    for strong in soup.find_all("strong"):
+        strong.replace_with(f"**{strong.text}**")
+    for em in soup.find_all("em"):
+        em.replace_with(f"*{em.text}*")
+    for p in soup.find_all("p"):
+        p.replace_with(f"{p.text}\n\n")
+    return soup.get_text()
+
+
+def download_icon(icon_url, save_folder="external_icons"):
+    try:
+        if not os.path.exists(save_folder):
+            os.makedirs(save_folder)
+
+        response = requests.get(icon_url, stream=True)
+        if response.status_code == 200:
+            icon_name = os.path.basename(icon_url)
+            icon_path = os.path.join(save_folder, icon_name)
+            with open(icon_path, "wb") as f:
+                for chunk in response.iter_content(1024):
+                    f.write(chunk)
+            return icon_path
+        else:
+            print(f"Failed to download icon: {response.status_code}")
+            return None
+    except Exception as e:
+        print(f"Error downloading icon: {e}")
+        return None
--- a/test_highlight.py
+++ b/test_highlight.py
@ -0,0 +1,91 @@
+import asyncio
+import re
+from pdf_highlighter import Highlighter
+from _chromadb import ChromaDB
+from _llm import LLM
+import ollama
+from colorprinter.print_color import *
+from concurrent.futures import ThreadPoolExecutor
+
+# Wrap the synchronous generate method
+async def async_generate(llm, prompt):
+    loop = asyncio.get_event_loop()
+    with ThreadPoolExecutor() as pool:
+        return await loop.run_in_executor(pool, llm.generate, prompt)
+
+
+# Define the main asynchronous function to highlight the PDFs
+async def highlight_pdf(data):
+    # Use the highlight method to highlight the relevant sentences in the PDFs
+    highlighted_pdf_buffer = await highlighter.highlight(
+        data=data, zero_indexed_pages=True  # Pages are zero-based (e.g., 0, 1, 2, ...)
+    )
+
+    # Save the highlighted PDF to a new file
+    with open("highlighted_combined_documents.pdf", "wb") as f:
+        f.write(highlighted_pdf_buffer.getbuffer())
+        print_green("PDF highlighting completed successfully!")
+
+
+# Initialize ChromaDB client
+chromadb = ChromaDB()
+
+# Define the query to fetch relevant text snippets and metadata from ChromaDB
+query = "How are climate researchers advocating for change in the society?"
+
+
+# Perform the query on ChromaDB
+result = chromadb.query(query, collection="sci_articles", n_results=5)
+# Use zip to combine the lists into a list of dictionaries
+results = [
+    {"id": id_, "metadata": metadata, "document": document, "distance": distance}
+    for id_, metadata, document, distance in zip(
+        result["ids"][0],
+        result["metadatas"][0],
+        result["documents"][0],
+        result["distances"][0],
+    )
+]
+
+for r in results:
+    print_rainbow(r["metadata"])
+    print_yellow(type(r["metadata"]['pages']))
+# Ask a LLM a question about the text snippets
+llm = LLM(model="small")
+documents_string = "\n\n---\n\n".join(result["documents"][0])
+answer = llm.generate(
+    f'''{query} Write your answer from the information below?\n\n"""{documents_string}"""\n\n{query}'''
+)
+print_green(answer)
+# Now you want to highlight relevant information in the PDFs to understand what the LLM is using!
+
+# Each result from ChromaDB contains the PDF filename and the pages where the text is found
+data = []
+for result in results:
+    pages = result["metadata"].get("pages")
+    try:
+        pages = [int(pages)]
+    except:
+        # Use re to extraxt the page numbers separated by commas
+        pages = list(map(int, re.findall(r"\d+", pages)))
+    
+    data.append(
+        {
+            "user_input": query,
+            "pdf_filename": result["metadata"]["_id"],
+            "pages": pages,
+            'chunk': result['document']
+        }
+    )
+
+# Initialize the Highlighter
+highlighter = Highlighter(
+    llm=llm,  # Pass the LLM to the Highlighter
+    comment=False,  # Enable comments to understand the context
+    use_llm=False
+)
+
+
+
+# Run the main function using asyncio
+asyncio.run(highlight_pdf(data))
--- a/test_ollama_client.py
+++ b/test_ollama_client.py
@ -0,0 +1,32 @@
+import os
+import base64
+from ollama import Client
+import env_manager
+from colorprinter.print_color import *
+env_manager.set_env()
+
+# Encode the credentials
+credentials = f"{os.getenv('LLM_API_USER')}:{os.getenv('LLM_API_PWD_LASSE')}"
+encoded_credentials = base64.b64encode(credentials.encode()).decode()
+
+# Set up the headers with authentication details
+headers = {
+    'Authorization': f'Basic {encoded_credentials}'
+}
+
+# Get the host URL (base URL only)
+host_url = os.getenv("LLM_API_URL").rstrip('/api/chat/')
+
+
+# Initialize the client with the host and headers
+client = Client(
+    host=host_url,
+    headers=headers
+)
+
+# Example usage of the client
+try:
+    response = client.chat(model=os.getenv('LLM_MODEL') , messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
+    print_rainbow(response)
+except Exception as e:
+    print(f"Error: {e}")