Add examples

2 years ago · 3a3c226baa
parent e4a5d56d8d
commit 3a3c226baa
2 changed files with 113 additions and 0 deletions
--- a/examples/data_from_chromadb.py
+++ b/examples/data_from_chromadb.py
@ -0,0 +1,78 @@
+import asyncio
+from highlight_pdf import Highlighter
+import chromadb
+import ollama
+
+# Initialize ChromaDB client
+client = chromadb.Client()
+
+# Define the query to fetch relevant text snippets and metadata from ChromaDB
+query = "What is said about climate?"
+model = "llama3.1"
+
+# Perform the query on ChromaDB
+results = client.query(query)
+
+# Results might look like this:
+# results = [
+#     {
+#         "metadatas": [[
+#             {
+#                 "pdf_filename": "example_pdf_document.pdf",
+#                 "pages": [1]
+#             }]],
+#         "documents": [["<Text extracted from the PDF page>"]],
+#         "ids": ["<ID of the document>"]
+#     },
+#     {
+#         "metadatas": [[
+#             {
+#                 "pdf_filename": "another_pdf_document.pdf",
+#                 "pages": [2, 3]
+#             }]],
+#         "documents": [["<Another text extracted from the PDF pages>"]],
+#         "ids": ["<ID of another document>"]
+#     }
+# ]
+
+# Ask a LLM a question about the text snippets
+documents_string = "\n".join(results[0]["documents"])
+answer = ollama.chat(
+    query=f"{query}\Only use information from the texts below when answering the question!\n\nTexts:\n{documents_string}",
+    model=model,
+    options={"temperature": 0},
+)["message"]["content"]
+
+# Now you want to highlight relevant information in the PDFs to understand what the LLM is using!
+
+# Each result from ChromaDB contains the PDF filename and the pages where the text is found
+data = [
+    {
+        "user_input": query,
+        "pdf_filename": result["metadatas"][0]["pdf_filename"],
+        "pages": result["metadatas"][0].get("pages"),
+    }
+    for result in results
+]
+
+# Initialize the Highlighter
+highlighter = Highlighter(
+    model="llama3.1",
+    comment=True,  # Enable comments to understand the context
+)
+
+
+# Define the main asynchronous function to highlight the PDFs
+async def highlight_pdf():
+    # Use the highlight method to highlight the relevant sentences in the PDFs
+    highlighted_pdf_buffer = await highlighter.highlight(
+        data=data, zero_indexed_pages=True  # Pages are zero-based (e.g., 0, 1, 2, ...)
+    )
+
+    # Save the highlighted PDF to a new file
+    with open("highlighted_combined_documents.pdf", "wb") as f:
+        f.write(highlighted_pdf_buffer.getbuffer())
+
+
+# Run the main function using asyncio
+asyncio.run(highlight_pdf())
--- a/examples/single_pdf.py
+++ b/examples/single_pdf.py
@ -0,0 +1,35 @@
+import asyncio
+import io
+from highlight_pdf import Highlighter
+
+# User input/question
+user_input = "What are the main findings?"
+
+# Answer received from LLM based on text in a PDF
+llm_answer = "The main findings are that the treatment was effective in 70% of cases."
+
+# PDF filename
+pdf_filename = "example_pdf_document.pdf"
+
+# Pages to consider (optional, can be None)
+pages = [1, 2]
+
+# Initialize the Highlighter
+highlighter = Highlighter(
+    model='llama3.1',
+    comment=True  # Enable comments to understand the context
+)
+
+# Define the main asynchronous function to highlight the PDF
+async def main():
+    highlighted_pdf_buffer = await highlighter.highlight(
+        user_input=user_input,
+        data=[{"text": llm_answer, "pdf_filename": pdf_filename, "pages": pages}]
+    )
+    
+    # Save the highlighted PDF to a new file
+    with open("highlighted_example_pdf_document.pdf", "wb") as f:
+        f.write(highlighted_pdf_buffer.getbuffer())
+
+# Run the main function using asyncio
+asyncio.run(main())