You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

191 lines
7.6 KiB

import requests
import json
import time
from _arango import ArangoDB # Import ArangoDB client to fetch results
def test_summarize_document():
"""
Test the document summarization functionality of the LLM server by sending a POST request
to the summarize_document endpoint.
This function creates a sample document, sends it to the LLM server, and then polls for results.
"""
print("Testing document summarization...")
# Define server endpoint
url = "http://localhost:8100/summarise_document"
# Create a sample document
sample_document = {
"arango_doc": {
"text": """
The Impact of Climate Change on Coral Reefs
Climate change has significantly affected marine ecosystems worldwide, with coral reefs being among the most vulnerable.
Rising sea temperatures have led to increased coral bleaching events, where corals expel their symbiotic algae,
leading to whitening and potential death. Studies show that even a 1-2°C increase in water temperature
can trigger mass bleaching events. Additionally, ocean acidification caused by increased CO2 absorption
makes it difficult for corals to build their calcium carbonate skeletons.
Recent research by Johnson et al. (2023) suggests that if current trends continue, we may lose up to 90%
of coral reefs by 2050. However, some corals have shown remarkable resilience. Certain species can adapt
to higher temperatures through a process called adaptive bleaching, where they exchange their algal symbionts
for more heat-tolerant varieties. Conservation efforts focused on cultivating these resilient species may
provide hope for reef preservation.
""",
"chunks": []
},
"arango_db_name": "test_db",
"arango_id": "articles/test_article",
"is_sci": True
}
# Send request to server
print("Sending document to server for summarization...")
response = requests.post(url, json=sample_document)
if response.status_code == 200:
print("Request accepted. Response:", response.json())
# Save values for checking results later
return {
"db_name": sample_document["arango_db_name"],
"doc_id": sample_document["arango_id"]
}
else:
print(f"Error: {response.status_code}")
print(response.text)
return None
def test_summarize_chunks():
"""
Test the chunk summarization functionality directly by creating a sample document with chunks.
In a real application, you'd typically query the results from the database after processing.
"""
print("\nTesting chunk summarization example...")
# Sample document with chunks
sample_document_with_chunks = {
"arango_doc": {
"text": "",
"chunks": [
{
"text": "Climate change has significantly affected marine ecosystems worldwide, with coral reefs being among the most vulnerable. Rising sea temperatures have led to increased coral bleaching events.",
"pages": [1]
},
{
"text": "Studies by Smith et al. [1] show that even a 1-2°C increase in water temperature can trigger mass bleaching events. Additionally, ocean acidification makes it difficult for corals to build their calcium carbonate skeletons.",
"pages": [1, 2]
}
]
},
"arango_db_name": "test_db",
"arango_id": "interviews/test_interview",
"is_sci": False
}
url = "http://localhost:8100/summarise_document"
print("Sending document with chunks for summarization...")
response = requests.post(url, json=sample_document_with_chunks)
if response.status_code == 200:
print("Request accepted. Response:", response.json())
return {
"db_name": sample_document_with_chunks["arango_db_name"],
"doc_id": sample_document_with_chunks["arango_id"]
}
else:
print(f"Error: {response.status_code}")
print(response.text)
return None
def poll_for_results(doc_info, max_retries=10, wait_time=5):
"""
Poll the ArangoDB database to check if the document has been summarized.
Args:
doc_info (dict): Dictionary containing db_name and doc_id
max_retries (int): Maximum number of polling attempts
wait_time (int): Time to wait between polling attempts (seconds)
Returns:
dict or None: The document with summaries if available, None otherwise
"""
if not doc_info:
return None
db_name = doc_info["db_name"]
doc_id = doc_info["doc_id"]
print(f"\nPolling for results in {db_name}/{doc_id}...")
arango = ArangoDB(db_name=db_name)
for attempt in range(max_retries):
print(f"Attempt {attempt+1}/{max_retries}...")
try:
# Get the document from ArangoDB
document = arango.get_document(doc_id)
# Check if the document has been summarized
if document and "summary" in document:
print("✓ Document summary found!")
print("-" * 50)
print("Document Summary:")
print("-" * 50)
print(document["summary"]["text_sum"])
print("-" * 50)
# Check if chunks have been summarized
if "chunks" in document and document["chunks"] and "summary" in document["chunks"][0]:
print("✓ Chunk summaries found!")
print("-" * 50)
print("First Chunk Summary:")
print("-" * 50)
print(document["chunks"][0]["summary"])
print("-" * 50)
if len(document["chunks"]) > 1:
print("Tags:", document["chunks"][0]["tags"])
return document
# If we haven't found summaries yet, wait and try again
time.sleep(wait_time)
except Exception as e:
print(f"Error checking document: {e}")
time.sleep(wait_time)
print("❌ Summarization not completed after maximum retries.")
return None
if __name__ == "__main__":
print("LLM Server Test Script")
print("=====================\n")
# Test if server is running
try:
requests.get("http://localhost:8100")
print("Server is running at http://localhost:8100\n")
except requests.exceptions.ConnectionError:
print("ERROR: Cannot connect to server at http://localhost:8100")
print("Make sure the server is running before continuing.\n")
exit(1)
# Run tests and store document info for polling
doc1_info = test_summarize_document()
time.sleep(2) # Brief pause between tests
doc2_info = test_summarize_chunks()
print("\nWaiting for background tasks to complete...")
print("This may take some time depending on LLM response speed.")
# Poll for results (with longer wait time for the first document which needs to be chunked)
poll_for_results(doc1_info, max_retries=20, wait_time=6)
poll_for_results(doc2_info, max_retries=12, wait_time=5)
print("\nTest script completed.")
print("If you didn't see results, the background tasks might still be processing.")
print("You can run this script again later to check, or query the database directly.")