parent
e4a5d56d8d
commit
3a3c226baa
2 changed files with 113 additions and 0 deletions
@ -0,0 +1,78 @@ |
|||||||
|
import asyncio |
||||||
|
from highlight_pdf import Highlighter |
||||||
|
import chromadb |
||||||
|
import ollama |
||||||
|
|
||||||
|
# Initialize ChromaDB client |
||||||
|
client = chromadb.Client() |
||||||
|
|
||||||
|
# Define the query to fetch relevant text snippets and metadata from ChromaDB |
||||||
|
query = "What is said about climate?" |
||||||
|
model = "llama3.1" |
||||||
|
|
||||||
|
# Perform the query on ChromaDB |
||||||
|
results = client.query(query) |
||||||
|
|
||||||
|
# Results might look like this: |
||||||
|
# results = [ |
||||||
|
# { |
||||||
|
# "metadatas": [[ |
||||||
|
# { |
||||||
|
# "pdf_filename": "example_pdf_document.pdf", |
||||||
|
# "pages": [1] |
||||||
|
# }]], |
||||||
|
# "documents": [["<Text extracted from the PDF page>"]], |
||||||
|
# "ids": ["<ID of the document>"] |
||||||
|
# }, |
||||||
|
# { |
||||||
|
# "metadatas": [[ |
||||||
|
# { |
||||||
|
# "pdf_filename": "another_pdf_document.pdf", |
||||||
|
# "pages": [2, 3] |
||||||
|
# }]], |
||||||
|
# "documents": [["<Another text extracted from the PDF pages>"]], |
||||||
|
# "ids": ["<ID of another document>"] |
||||||
|
# } |
||||||
|
# ] |
||||||
|
|
||||||
|
# Ask a LLM a question about the text snippets |
||||||
|
documents_string = "\n".join(results[0]["documents"]) |
||||||
|
answer = ollama.chat( |
||||||
|
query=f"{query}\Only use information from the texts below when answering the question!\n\nTexts:\n{documents_string}", |
||||||
|
model=model, |
||||||
|
options={"temperature": 0}, |
||||||
|
)["message"]["content"] |
||||||
|
|
||||||
|
# Now you want to highlight relevant information in the PDFs to understand what the LLM is using! |
||||||
|
|
||||||
|
# Each result from ChromaDB contains the PDF filename and the pages where the text is found |
||||||
|
data = [ |
||||||
|
{ |
||||||
|
"user_input": query, |
||||||
|
"pdf_filename": result["metadatas"][0]["pdf_filename"], |
||||||
|
"pages": result["metadatas"][0].get("pages"), |
||||||
|
} |
||||||
|
for result in results |
||||||
|
] |
||||||
|
|
||||||
|
# Initialize the Highlighter |
||||||
|
highlighter = Highlighter( |
||||||
|
model="llama3.1", |
||||||
|
comment=True, # Enable comments to understand the context |
||||||
|
) |
||||||
|
|
||||||
|
|
||||||
|
# Define the main asynchronous function to highlight the PDFs |
||||||
|
async def highlight_pdf(): |
||||||
|
# Use the highlight method to highlight the relevant sentences in the PDFs |
||||||
|
highlighted_pdf_buffer = await highlighter.highlight( |
||||||
|
data=data, zero_indexed_pages=True # Pages are zero-based (e.g., 0, 1, 2, ...) |
||||||
|
) |
||||||
|
|
||||||
|
# Save the highlighted PDF to a new file |
||||||
|
with open("highlighted_combined_documents.pdf", "wb") as f: |
||||||
|
f.write(highlighted_pdf_buffer.getbuffer()) |
||||||
|
|
||||||
|
|
||||||
|
# Run the main function using asyncio |
||||||
|
asyncio.run(highlight_pdf()) |
||||||
@ -0,0 +1,35 @@ |
|||||||
|
import asyncio |
||||||
|
import io |
||||||
|
from highlight_pdf import Highlighter |
||||||
|
|
||||||
|
# User input/question |
||||||
|
user_input = "What are the main findings?" |
||||||
|
|
||||||
|
# Answer received from LLM based on text in a PDF |
||||||
|
llm_answer = "The main findings are that the treatment was effective in 70% of cases." |
||||||
|
|
||||||
|
# PDF filename |
||||||
|
pdf_filename = "example_pdf_document.pdf" |
||||||
|
|
||||||
|
# Pages to consider (optional, can be None) |
||||||
|
pages = [1, 2] |
||||||
|
|
||||||
|
# Initialize the Highlighter |
||||||
|
highlighter = Highlighter( |
||||||
|
model='llama3.1', |
||||||
|
comment=True # Enable comments to understand the context |
||||||
|
) |
||||||
|
|
||||||
|
# Define the main asynchronous function to highlight the PDF |
||||||
|
async def main(): |
||||||
|
highlighted_pdf_buffer = await highlighter.highlight( |
||||||
|
user_input=user_input, |
||||||
|
data=[{"text": llm_answer, "pdf_filename": pdf_filename, "pages": pages}] |
||||||
|
) |
||||||
|
|
||||||
|
# Save the highlighted PDF to a new file |
||||||
|
with open("highlighted_example_pdf_document.pdf", "wb") as f: |
||||||
|
f.write(highlighted_pdf_buffer.getbuffer()) |
||||||
|
|
||||||
|
# Run the main function using asyncio |
||||||
|
asyncio.run(main()) |
||||||
Loading…
Reference in new issue