From 6b1f99de183d07d15499d0c73e1e59323c5bd023 Mon Sep 17 00:00:00 2001
From: lasseedfast <lasse.edfast@gmail.com>
Date: Mon, 7 Oct 2024 16:26:29 +0200
Subject: [PATCH] first commit

---
 .gitignore               |   3 +
 example_streamlit_app.py |  51 +++++
 highlight_pdf.py         | 448 +++++++++++++++++++++++++++++++++++++++
 readme.md                | 176 +++++++++++++++
 4 files changed, 678 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 example_streamlit_app.py
 create mode 100644 highlight_pdf.py
 create mode 100644 readme.md
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..24a4d67
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+/.env
+/.venv
+/__pycache__
\ No newline at end of file
diff --git a/example_streamlit_app.py b/example_streamlit_app.py
new file mode 100644
index 0000000..ffbd8b6
--- /dev/null
+++ b/example_streamlit_app.py
@@ -0,0 +1,51 @@
+import streamlit as st
+from highlight_pdf import Highlighter
+import asyncio
+import io
+import base64
+
+async def highlight_pdf(user_input, pdf_file, make_comments):
+    highlighter = Highlighter(comment=make_comments)
+    pdf_buffer = io.BytesIO(pdf_file.read())
+    highlighted_pdf_buffer = await highlighter.highlight(user_input, pdf_buffer=pdf_buffer)
+    return highlighted_pdf_buffer
+
+def main():
+
+    with st.sidebar:
+        st.write('This is a demo of a PDF highlighter tool that highlights relevant sentences in a PDF document based on user input.')
+    st.title("PDF Highlighter Demo")
+
+    user_input = st.text_input("Enter your question or input text:")
+    pdf_file = st.file_uploader("Upload a PDF file", type=["pdf"])
+    make_comments = st.checkbox("Make comments to the highlighted text (takes a bit longer)")
+
+    if st.button("Highlight PDF"):
+        if user_input and pdf_file:
+            with st.spinner("Processing..."):
+                highlighted_pdf_buffer = asyncio.run(highlight_pdf(user_input, pdf_file, make_comments))
+                if highlighted_pdf_buffer:
+                    # Encode the PDF buffer to base64
+                    base64_pdf = base64.b64encode(highlighted_pdf_buffer.getvalue()).decode('utf-8')
+
+                    # Embed PDF in HTML
+                    pdf_display = F'<iframe src="data:application/pdf;base64,{base64_pdf}" width="300" height="700" type="application/pdf"></iframe>'
+
+                    with st.sidebar:
+                        # Display file
+                        st.markdown("_Preview of highlighted PDF:_")
+                        st.markdown(pdf_display, unsafe_allow_html=True)
+                
+                    st.download_button(
+                        label="Download Highlighted PDF",
+                        data=highlighted_pdf_buffer,
+                        file_name="highlighted_document.pdf",
+                        mime="application/pdf"
+                    )
+                else:
+                    st.error("No relevant sentences found to highlight.")
+        else:
+            st.error("Please provide both user input and a PDF file.")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/highlight_pdf.py b/highlight_pdf.py
new file mode 100644
index 0000000..533550a
--- /dev/null
+++ b/highlight_pdf.py
@@ -0,0 +1,448 @@
+import re
+import warnings
+import pymupdf
+import nltk
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import linear_kernel
+import io
+import dotenv
+import os
+import asyncio
+import aiofiles
+
+# Check if 'punkt_tab' tokenizer data is available
+try:
+    nltk.data.find("tokenizers/punkt_tab")
+except LookupError:
+    import logging
+
+    logging.info("Downloading 'punkt_tab' tokenizer data for NLTK.")
+    nltk.download("punkt_tab")
+
+
+CUSTOM_SYSTEM_PROMPT = """
+You're helping a journalist with research by choosing what sentences should be highlighted in a text. 
+Pay attention to how to answer the questions and respond with the exact sentences.
+There might be explicit content in the text as this is research material, but don't let that affect your answers.
+"""
+
+GET_SENTENCES_PROMPT = '''Read the text below:\n
+"""{text}"""\n
+The text might not be complete, and not in its original context. Try to understand the text and give an answer from the text.\n
+A researcher wants to get an answer to the question "{user_input}". What sentences should be highlighted? Answer ONLY with the exact sentences.
+'''
+
+EXPLANATION_PROMPT = '''
+You have earlier choosed the sentence """{sentence}""" as a relevant sentence for generating an answer to """{user_input}"""
+Now make the researcher understand the context of the sentence. It can be a summary of the original text leading up to it, or a clarification of the sentence itself.
+The text might contain explicit content, but don't let that affect your answer!
+Your answer will be used as a comment to a highlighted sentence in a PDF. Don't refer to yourself, only the text! Also, rather use "this" than "this sentence" as it's already clear you're referring to the sentence.
+'''
+
+
+class LLM:
+    """
+    LLM class for interacting with language models from OpenAI or Ollama.
+
+    Attributes:
+        model (str): The model to be used for generating responses.
+        temperature (float): The temperature setting for the model's response generation.
+        num_ctx (int): The number of context tokens to be used.
+        keep_alive (int): The keep-alive duration for the connection.
+        options (dict): Options for the model's response generation.
+        memory (bool): Whether to retain conversation history.
+        messages (list): List of messages in the conversation.
+        openai (bool): Flag indicating if OpenAI is being used.
+        ollama (bool): Flag indicating if Ollama is being used.
+        client (object): The client object for OpenAI.
+        llm (object): The client object for the language model.
+
+    Methods:
+        __init__(openai_key=False, model=None, temperature=0, system_prompt=None, num_ctx=None, memory=True, keep_alive=3600):
+            Initializes the LLM class with the provided parameters.
+        use_openai(key, model):
+            Configures the class to use OpenAI for generating responses.
+        use_ollama(model):
+            Configures the class to use Ollama for generating responses.
+        generate(prompt):
+            Asynchronously generates a response based on the provided prompt.
+    """
+
+    def __init__(
+        self,
+        openai_key=False,
+        model=None,
+        temperature=0,
+        system_prompt=None,
+        num_ctx=None,
+        memory=True,
+        keep_alive=3600,
+    ):
+        """
+        Initialize the highlight_pdf class.
+
+        Parameters:
+        openai_key (str or bool): API key for OpenAI. If False, Ollama will be used.
+        model (str, optional): The model to be used. Defaults to None.
+        temperature (float, optional): Sampling temperature for the model. Defaults to 0.
+        system_prompt (str, optional): Initial system prompt for the model. Defaults to None.
+        num_ctx (int, optional): Number of context tokens. Defaults to None.
+        memory (bool, optional): Whether to use memory. Defaults to True.
+        keep_alive (int, optional): Keep-alive duration in seconds. Defaults to 3600.
+        """
+        dotenv.load_dotenv()
+        if model:
+            self.model = model
+        else:
+            self.model = os.getenv("LLM_MODEL")
+        self.temperature = temperature
+        self.num_ctx = num_ctx
+        self.keep_alive = keep_alive
+        self.options = {"temperature": self.temperature}
+        self.memory = memory
+        if self.num_ctx:
+            self.options["num_ctx"] = self.num_ctx
+        if system_prompt:
+            self.messages = [{"role": "system", "content": system_prompt}]
+        else:
+            self.messages = [{"role": "system", "content": CUSTOM_SYSTEM_PROMPT}]
+
+        if openai_key:  # For use with OpenAI
+            self.use_openai(openai_key, model)
+        else:  # For use with Ollama
+            self.use_ollama(model)
+
+    def use_openai(self, key, model):
+        """
+        Configures the instance to use OpenAI's API for language model operations.
+
+        Args:
+            key (str): The API key for authenticating with OpenAI.
+            model (str): The specific model to use. If not provided, it will default to the value of the "OPENAI_MODEL" environment variable.
+
+        Attributes:
+            llm (module): The OpenAI module.
+            client (openai.AsyncOpenAI): The OpenAI client initialized with the provided API key.
+            openai (bool): Flag indicating that OpenAI is being used.
+            ollama (bool): Flag indicating that Ollama is not being used.
+            model (str): The model to be used for OpenAI operations.
+        """
+        import openai
+
+        self.llm = openai
+        self.client = openai.AsyncOpenAI(api_key=key)
+        self.openai = True
+        self.ollama = False
+        if model:
+            self.model = model
+        else:
+            self.model = os.getenv("OPENAI_MODEL")
+
+    def use_ollama(self, model):
+        """
+        Configures the instance to use the Ollama LLM (Language Learning Model) service.
+
+        This method initializes an asynchronous Ollama client and sets the appropriate flags
+        to indicate that Ollama is being used instead of OpenAI. It also sets the model to be
+        used for the LLM, either from the provided argument or from an environment variable.
+
+        Args:
+            model (str): The name of the model to be used. If not provided, the model name
+                         will be fetched from the environment variable 'LLM_MODEL'.
+        """
+        import ollama
+
+        self.llm = ollama.AsyncClient()
+        self.ollama = True
+        self.openai = False
+        if model:
+            self.model = model
+        else:
+            self.model = os.getenv("LLM_MODEL")
+
+    async def generate(self, prompt):
+        """
+        Generates a response based on the provided prompt using either OpenAI or Ollama.
+
+        Args:
+            prompt (str): The input prompt to generate a response for.
+
+        Returns:
+            str: The generated response.
+
+        Notes:
+            - The prompt is stripped of leading whitespace on each line.
+        """
+        prompt = re.sub(r"^\s+", "", prompt, flags=re.MULTILINE)
+        self.messages.append({"role": "user", "content": prompt})
+        if self.openai:
+            chat_completion = await self.client.chat.completions.create(
+                messages=self.messages, model=self.model, temperature=0
+            )
+            answer = chat_completion.choices[0].message.content
+            return answer
+        elif self.ollama:
+            response = await self.llm.chat(
+                messages=self.messages,
+                model=self.model,
+                options=self.options,
+                keep_alive=self.keep_alive,
+            )
+            answer = response["message"]["content"]
+
+        self.messages.append({"role": "assistant", "content": answer})
+        if not self.memory:
+            self.messages = self.messages[0]
+        return answer
+
+
+class Highlighter:
+    """
+    Highlighter class for annotating and highlighting sentences in PDF documents using an LLM (Large Language Model).
+    Attributes:
+        silent (bool): Flag to suppress warnings.
+        comment (bool): Flag to add comments to highlighted sentences.
+        llm_params (dict): Parameters for the LLM.
+    Methods:
+        __init__(self, silent=False, openai_key=None, comments=False, llm_model=None, llm_temperature=0, llm_system_prompt=None, llm_num_ctx=None, llm_memory=True, llm_keep_alive=3600):
+            Initializes the Highlighter class with the given parameters.
+        async highlight(self, user_input, docs=None, data=None, pdf_filename=None):
+            Highlights sentences in the provided PDF documents based on the user input.
+        async get_sentences_with_llm(self, text, user_input):
+            Uses the LLM to generate sentences from the text that should be highlighted based on the user input.
+        async annotate_pdf(self, user_input: str, filename: str, pages: list = None, extend_pages: bool = False):
+            Annotates the PDF with highlighted sentences and optional comments.
+            Fixes the filename by replacing special characters with their ASCII equivalents.
+    """
+
+    def __init__(
+        self,
+        silent=False,
+        openai_key=None,
+        comment=False,
+        llm_model=None,
+        llm_temperature=0,
+        llm_system_prompt=None,
+        llm_num_ctx=None,
+        llm_memory=True,
+        llm_keep_alive=3600,
+    ):
+        """
+        Initialize the class with the given parameters.
+
+        Parameters:
+        silent (bool): Flag to suppress output.
+        openai_key (str or None): API key for OpenAI.
+        comment (bool): Flag to enable or disable comments.
+        llm_model (str or None): The model name for the language model.
+        llm_temperature (float): The temperature setting for the language model.
+        llm_system_prompt (str or None): The system prompt for the language model.
+        llm_num_ctx (int or None): The number of context tokens for the language model.
+        llm_memory (bool): Flag to enable or disable memory for the language model.
+        llm_keep_alive (int): The keep-alive duration for the language model in seconds.
+        """
+        self.silent = silent
+        self.comment = comment
+        self.llm_params = {
+            "openai_key": openai_key,
+            "model": llm_model,
+            "temperature": llm_temperature,
+            "system_prompt": llm_system_prompt,
+            "num_ctx": llm_num_ctx,
+            "memory": llm_memory,
+            "keep_alive": llm_keep_alive,
+        }
+
+    async def highlight(
+        self,
+        user_input,
+        docs=None,
+        data=None,
+        pdf_filename=None,
+    ):
+        """
+        Highlights text in one or more PDF documents based on user input.
+        Args:
+            user_input (str): The text input from the user to highlight in the PDFs.
+            docs (list, optional): A list of PDF filenames to process. Defaults to None.
+            data (dict, optional): Data in JSON format to process. Should be on the format: {"pdf_filename": "filename", "pages": [1, 2, 3]}. Defaults to None.
+            pdf_filename (str, optional): A single PDF filename to process. Defaults to None.
+        Returns:
+            io.BytesIO: A buffer containing the combined PDF with highlights.
+        Raises:
+            AssertionError: If none of `data`, `pdf_filename`, or `docs` are provided.
+        """
+        pdf_buffers = []
+        assert any(
+            [data, pdf_filename, docs]
+        ), "You need to provide either a PDF filename, a list of filenames or data in JSON format."
+
+        if data:
+            docs = [item['pdf_filename'] for item in data]
+
+        if not docs:
+            docs = [pdf_filename]
+
+        tasks = [self.annotate_pdf(user_input, doc, pages=item.get('pages')) for doc, item in zip(docs, data or [{}]*len(docs))]
+        pdf_buffers = await asyncio.gather(*tasks)
+
+        combined_pdf = pymupdf.open()
+        new_toc = []
+
+        for buffer in pdf_buffers:
+            if not buffer:
+                continue
+            pdf = pymupdf.open(stream=buffer, filetype="pdf")
+            length = len(combined_pdf)
+            combined_pdf.insert_pdf(pdf)
+            new_toc.append([1, f"Document {length + 1}", length + 1])
+
+        combined_pdf.set_toc(new_toc)
+        pdf_buffer = io.BytesIO()
+        combined_pdf.save(pdf_buffer)
+        pdf_buffer.seek(0)
+
+        return pdf_buffer
+
+    async def get_sentences_with_llm(self, text, user_input):
+        prompt = GET_SENTENCES_PROMPT.format(text=text, user_input=user_input)
+
+        answer = await self.llm.generate(prompt)
+        return answer.split("\n")
+
+    async def annotate_pdf(
+        self,
+        user_input: str,
+        filename: str,
+        pages: list = None,
+        extend_pages: bool = False,
+    ):
+        self.llm = LLM(**self.llm_params)
+
+        pdf = pymupdf.open(filename)
+        output_pdf = pymupdf.open()
+        vectorizer = TfidfVectorizer()
+
+        if pages is not None:
+            new_pdf = pymupdf.open()
+            pdf_pages = pdf.pages(pages[0], pages[-1] + 1)
+            pdf_text = ""
+            for page in pdf_pages:
+                pdf_text += f'\n{page.get_text("text")}'
+                new_pdf.insert_pdf(pdf, from_page=page.number, to_page=page.number)
+        else:
+            pdf_text = "\n".join([page.get_text("text") for page in pdf])
+            new_pdf = pymupdf.open()
+            new_pdf.insert_pdf(pdf)
+
+        pdf_sentences = nltk.sent_tokenize(pdf_text)
+        tfidf_text = vectorizer.fit_transform(pdf_sentences)
+        sentences = await self.get_sentences_with_llm(pdf_text, user_input)
+        highlight_sentences = []
+        for sentence in sentences:
+            if sentence == "None" or len(sentence) < 5:
+                continue
+
+            sentence = sentence.replace('"', "").strip()
+            if sentence in pdf_text:
+                highlight_sentences.append(sentence)
+            else:
+                tfidf_sentence = vectorizer.transform([sentence])
+                cosine_similarities = linear_kernel(
+                    tfidf_sentence, tfidf_text
+                ).flatten()
+                most_similar_index = cosine_similarities.argmax()
+                most_similar_sentence = pdf_sentences[most_similar_index]
+                highlight_sentences.append(most_similar_sentence)
+
+        relevant_pages = set()
+
+        for sentence in highlight_sentences:
+            found = False
+            if self.comment:
+                explanation = await self.llm.generate(
+                    EXPLANATION_PROMPT.format(sentence=sentence, user_input=user_input)
+                )
+            for page in new_pdf:
+                rects = page.search_for(sentence)
+                if not rects:
+                    continue
+                found = True
+                p1 = rects[0].tl
+                p2 = rects[-1].br
+                highlight = page.add_highlight_annot(start=p1, stop=p2)
+                if self.comment:
+                    highlight.set_info(content=explanation)
+                relevant_pages.add(page.number)
+                new_pdf.reload_page(page)
+
+            if not found and not self.silent:
+                warnings.warn(f"Sentence not found: {sentence}", category=UserWarning)
+
+        extended_pages = []
+        if extend_pages:
+            for p in relevant_pages:
+                extended_pages.append(p)
+                if p - 1 not in extended_pages and p - 1 != -1:
+                    extended_pages.append(p - 1)
+                if p + 1 not in extended_pages:
+                    extended_pages.append(p + 1)
+            relevant_pages = extended_pages
+        for p in relevant_pages:
+            output_pdf.insert_pdf(new_pdf, from_page=p, to_page=p)
+
+        if len(output_pdf) != 0:
+            buffer = io.BytesIO()
+            new_pdf.save(buffer)
+            buffer.seek(0)
+            return buffer
+        else:
+            if not self.silent:
+                warnings.warn("No relevant sentences found", category=UserWarning)
+            return None
+
+
+async def save_pdf_to_file(pdf_buffer, filename):
+    async with aiofiles.open(filename, "wb") as f:
+        await f.write(pdf_buffer.getbuffer())
+
+
+if __name__ == "__main__":
+    import argparse
+    import json
+
+    # Set up argument parser for command-line interface
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--user_input", type=str, help="The user input")
+    parser.add_argument("--pdf_filename", type=str, help="The PDF filename")
+    parser.add_argument("--silent", action="store_true", help="No user warnings")
+    parser.add_argument("--openai_key", type=str, help="OpenAI API key")
+    parser.add_argument("--comment", action="store_true", help="Include comments")
+    parser.add_argument(
+        "--data",
+        type=json.loads,
+        help="The data in JSON format (fields: user_input, pdf_filename, list_of_pages)",
+    )
+    args = parser.parse_args()
+
+    # Initialize the Highlighter class with the provided arguments
+    highlighter = Highlighter(
+        silent=args.silent,
+        openai_key=args.openai_key,
+        comment=args.comment,
+    )
+
+    # Define the main asynchronous function to highlight the PDF
+    async def main():
+        highlighted_pdf = await highlighter.highlight(
+            user_input=args.user_input,
+            pdf_filename=args.pdf_filename,
+            data=args.data,
+        )
+        # Save the highlighted PDF to a new file
+        await save_pdf_to_file(
+            highlighted_pdf, args.pdf_filename.replace(".pdf", "_highlighted.pdf")
+        )
+
+    # Run the main function using asyncio
+    asyncio.run(main())
\ No newline at end of file
diff --git a/readme.md b/readme.md
new file mode 100644
index 0000000..461c76e
--- /dev/null
+++ b/readme.md
@@ -0,0 +1,176 @@
+# PDF Highlighter
+
+This project offers a tool for highlighting and annotating sentences in PDF documents using a Large Language Model (LLM). It is designed to help users identify and emphasize relevant sentences in their documents.
+
+## Use cases
+
+- **Finding Relevant Information**:
+   - Highlight specific sentences in a PDF that are relevant to a user's question or input. For example, if a user asks, "What are the main findings?", the tool will highlight sentences in the PDF that answer this question.
+
+- **Reviewing LLM-Generated Answers**:
+   - If a user has received an answer from an LLM based on information in a PDF, they can use this tool to highlight the exact text in the PDF that supports the LLM's answer. This helps in verifying and understanding the context of the LLM's response.
+
+## Features
+
+- Highlight sentences in PDF documents based on user input.
+- Optionally add comments to highlighted sentences.
+- Supports both OpenAI and Ollama language models.
+- Combine multiple PDFs into a single document with highlights and comments.
+
+## Requirements
+
+- Python 3.7+ (tested with 3.10.13)
+- Required Python packages (see `requirements.txt`)
+
+## Installation
+
+1. Clone the repository:
+    ```sh
+    git clone https://github.com/lasseedfast/pdf-highlighter.git
+    cd pdf-highlighter
+    ```
+
+2. Create a virtual environment and activate it:
+    ```sh
+    python -m venv venv
+    source venv/bin/activate
+    ```
+
+3. Install the required packages:
+    ```sh
+    pip install -r requirements.txt
+    ```
+
+4. Set up environment variables:
+    - Create a `.env` file in the root directory.
+    - Add your OpenAI API key and LLM model details:
+        ```
+        OPENAI_API_KEY=your_openai_api_key
+        LLM_MODEL=your_llm_model
+        ```
+
+## Usage
+
+### Command-Line Interface
+
+You can use the command-line interface to highlight sentences in a PDF document.
+
+```sh
+python highlight_pdf.py --user_input "Your question or input text" --pdf_filename "path/to/your/document.pdf" --openai_key "your_openai_api_key" --comment
+```
+
+#### Arguments
+
+- `--user_input`: The text input from the user to highlight in the PDFs.
+- `--pdf_filename`: The PDF filename to process.
+- `--silent`: Suppress warnings (optional).
+- `--openai_key`: OpenAI API key (optional if set in `.env`).
+- `--comment`: Include comments in the highlighted PDF (optional).
+- `--data`: Data in JSON format (fields: text, pdf_filename, pages) (optional).
+
+#### Example
+
+```sh
+python highlight_pdf.py --user_input "What are the main findings?" --pdf_filename "research_paper.pdf" --openai_key "sk-..." --comment
+```
+
+### Note on Long PDFs
+
+If the PDF is long, the result will be better if the user provides the data containing filename, user_input, and pages. This helps the tool focus on specific parts of the document, improving the accuracy and relevance of the highlights.
+
+#### Example with Data
+
+```sh
+python highlight_pdf.py --data '[{"text": "Some text to highlight", "pdf_filename": "example.pdf", "pages": [1, 2, 3]}]'
+```
+
+#### Output
+
+The highlighted PDF will be saved with `_highlighted` appended to the original filename.
+
+### Use in Python Code
+
+Here's a short Python code example demonstrating how to use the highlight tool to understand what exact text in the PDF is relevant for the original user input/question. This example assumes that the user has previously received an answer from an LLM based on text in a PDF.
+
+```python
+import asyncio
+import io
+from highlight_pdf import Highlighter
+
+# User input/question
+user_input = "What are the main findings?"
+
+# Answer received from LLM based on text in a PDF
+llm_answer = "The main findings are that the treatment was effective in 70% of cases."
+
+# PDF filename
+pdf_filename = "research_paper.pdf"
+
+# Pages to consider (optional, can be None)
+pages = [1, 2, 3]
+
+# Initialize the Highlighter
+highlighter = Highlighter(
+    openai_key="your_openai_api_key",
+    comment=True  # Enable comments to understand the context
+)
+
+# Define the main asynchronous function to highlight the PDF
+async def main():
+    highlighted_pdf_buffer = await highlighter.highlight(
+        user_input=user_input,
+        data=[{"text": llm_answer, "pdf_filename": pdf_filename, "pages": pages}]
+    )
+    
+    # Save the highlighted PDF to a new file
+    with open("highlighted_research_paper.pdf", "wb") as f:
+        f.write(highlighted_pdf_buffer.getbuffer())
+
+# Run the main function using asyncio
+asyncio.run(main())
+```
+
+## Streamlit Example
+
+A Streamlit example is provided in `example_streamlit_app.py` to demonstrate how to use the PDF highlighter tool in a web application.
+
+### Running the Streamlit App
+
+1. Ensure you have installed the required packages and set up the environment variables as described in the Installation section.
+2. Run the Streamlit app:
+    ```sh
+    streamlit run example_streamlit_app.py
+    ```
+
+#### Streamlit App Features
+
+- Enter your question or input text.
+- Upload a PDF file.
+- Optionally, choose to add comments to the highlighted text.
+- Click the "Highlight PDF" button to process the PDF.
+- Preview the highlighted PDF in the sidebar.
+- Download the highlighted PDF.
+
+## API
+
+### Highlighter Class
+
+#### Methods
+
+- `__init__(self, silent=False, openai_key=None, comment=False, llm_model=None, llm_temperature=0, llm_system_prompt=None, llm_num_ctx=None, llm_memory=True, llm_keep_alive=3600)`: Initializes the Highlighter class with the given parameters.
+- `async highlight(self, user_input, docs=None, data=None, pdf_filename=None)`: Highlights sentences in the provided PDF documents based on the user input.
+- `async get_sentences_with_llm(self, text, user_input)`: Uses the LLM to generate sentences from the text that should be highlighted based on the user input.
+- `async annotate_pdf(self, user_input: str, filename: str, pages: list = None, extend_pages: bool = False)`: Annotates the PDF with highlighted sentences and optional comments.
+
+### LLM Class
+
+#### Methods
+
+- `__init__(self, openai_key=False, model=None, temperature=0, system_prompt=None, num_ctx=None, memory=True, keep_alive=3600)`: Initializes the LLM class with the provided parameters.
+- `use_openai(self, key, model)`: Configures the class to use OpenAI for generating responses.
+- `use_ollama(self, model)`: Configures the class to use Ollama for generating responses.
+- `async generate(self, prompt)`: Asynchronously generates a response based on the provided prompt.
+
+## Contributing
+
+Contributions are welcome! Please open an issue or submit a pull request for any improvements or bug fixes.
\ No newline at end of file