Add examples
This commit is contained in:
parent
e4a5d56d8d
commit
3a3c226baa
78
examples/data_from_chromadb.py
Normal file
78
examples/data_from_chromadb.py
Normal file
@ -0,0 +1,78 @@
|
||||
import asyncio
|
||||
from highlight_pdf import Highlighter
|
||||
import chromadb
|
||||
import ollama
|
||||
|
||||
# Initialize ChromaDB client
|
||||
client = chromadb.Client()
|
||||
|
||||
# Define the query to fetch relevant text snippets and metadata from ChromaDB
|
||||
query = "What is said about climate?"
|
||||
model = "llama3.1"
|
||||
|
||||
# Perform the query on ChromaDB
|
||||
results = client.query(query)
|
||||
|
||||
# Results might look like this:
|
||||
# results = [
|
||||
# {
|
||||
# "metadatas": [[
|
||||
# {
|
||||
# "pdf_filename": "example_pdf_document.pdf",
|
||||
# "pages": [1]
|
||||
# }]],
|
||||
# "documents": [["<Text extracted from the PDF page>"]],
|
||||
# "ids": ["<ID of the document>"]
|
||||
# },
|
||||
# {
|
||||
# "metadatas": [[
|
||||
# {
|
||||
# "pdf_filename": "another_pdf_document.pdf",
|
||||
# "pages": [2, 3]
|
||||
# }]],
|
||||
# "documents": [["<Another text extracted from the PDF pages>"]],
|
||||
# "ids": ["<ID of another document>"]
|
||||
# }
|
||||
# ]
|
||||
|
||||
# Ask a LLM a question about the text snippets
|
||||
documents_string = "\n".join(results[0]["documents"])
|
||||
answer = ollama.chat(
|
||||
query=f"{query}\Only use information from the texts below when answering the question!\n\nTexts:\n{documents_string}",
|
||||
model=model,
|
||||
options={"temperature": 0},
|
||||
)["message"]["content"]
|
||||
|
||||
# Now you want to highlight relevant information in the PDFs to understand what the LLM is using!
|
||||
|
||||
# Each result from ChromaDB contains the PDF filename and the pages where the text is found
|
||||
data = [
|
||||
{
|
||||
"user_input": query,
|
||||
"pdf_filename": result["metadatas"][0]["pdf_filename"],
|
||||
"pages": result["metadatas"][0].get("pages"),
|
||||
}
|
||||
for result in results
|
||||
]
|
||||
|
||||
# Initialize the Highlighter
|
||||
highlighter = Highlighter(
|
||||
model="llama3.1",
|
||||
comment=True, # Enable comments to understand the context
|
||||
)
|
||||
|
||||
|
||||
# Define the main asynchronous function to highlight the PDFs
|
||||
async def highlight_pdf():
|
||||
# Use the highlight method to highlight the relevant sentences in the PDFs
|
||||
highlighted_pdf_buffer = await highlighter.highlight(
|
||||
data=data, zero_indexed_pages=True # Pages are zero-based (e.g., 0, 1, 2, ...)
|
||||
)
|
||||
|
||||
# Save the highlighted PDF to a new file
|
||||
with open("highlighted_combined_documents.pdf", "wb") as f:
|
||||
f.write(highlighted_pdf_buffer.getbuffer())
|
||||
|
||||
|
||||
# Run the main function using asyncio
|
||||
asyncio.run(highlight_pdf())
|
35
examples/single_pdf.py
Normal file
35
examples/single_pdf.py
Normal file
@ -0,0 +1,35 @@
|
||||
import asyncio
|
||||
import io
|
||||
from highlight_pdf import Highlighter
|
||||
|
||||
# User input/question
|
||||
user_input = "What are the main findings?"
|
||||
|
||||
# Answer received from LLM based on text in a PDF
|
||||
llm_answer = "The main findings are that the treatment was effective in 70% of cases."
|
||||
|
||||
# PDF filename
|
||||
pdf_filename = "example_pdf_document.pdf"
|
||||
|
||||
# Pages to consider (optional, can be None)
|
||||
pages = [1, 2]
|
||||
|
||||
# Initialize the Highlighter
|
||||
highlighter = Highlighter(
|
||||
model='llama3.1',
|
||||
comment=True # Enable comments to understand the context
|
||||
)
|
||||
|
||||
# Define the main asynchronous function to highlight the PDF
|
||||
async def main():
|
||||
highlighted_pdf_buffer = await highlighter.highlight(
|
||||
user_input=user_input,
|
||||
data=[{"text": llm_answer, "pdf_filename": pdf_filename, "pages": pages}]
|
||||
)
|
||||
|
||||
# Save the highlighted PDF to a new file
|
||||
with open("highlighted_example_pdf_document.pdf", "wb") as f:
|
||||
f.write(highlighted_pdf_buffer.getbuffer())
|
||||
|
||||
# Run the main function using asyncio
|
||||
asyncio.run(main())
|
Loading…
x
Reference in New Issue
Block a user