You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
191 lines
7.6 KiB
191 lines
7.6 KiB
import requests |
|
import json |
|
import time |
|
from _arango import ArangoDB # Import ArangoDB client to fetch results |
|
|
|
def test_summarize_document(): |
|
""" |
|
Test the document summarization functionality of the LLM server by sending a POST request |
|
to the summarize_document endpoint. |
|
|
|
This function creates a sample document, sends it to the LLM server, and then polls for results. |
|
""" |
|
print("Testing document summarization...") |
|
|
|
# Define server endpoint |
|
url = "http://localhost:8100/summarise_document" |
|
|
|
# Create a sample document |
|
sample_document = { |
|
"arango_doc": { |
|
"text": """ |
|
The Impact of Climate Change on Coral Reefs |
|
|
|
Climate change has significantly affected marine ecosystems worldwide, with coral reefs being among the most vulnerable. |
|
Rising sea temperatures have led to increased coral bleaching events, where corals expel their symbiotic algae, |
|
leading to whitening and potential death. Studies show that even a 1-2°C increase in water temperature |
|
can trigger mass bleaching events. Additionally, ocean acidification caused by increased CO2 absorption |
|
makes it difficult for corals to build their calcium carbonate skeletons. |
|
|
|
Recent research by Johnson et al. (2023) suggests that if current trends continue, we may lose up to 90% |
|
of coral reefs by 2050. However, some corals have shown remarkable resilience. Certain species can adapt |
|
to higher temperatures through a process called adaptive bleaching, where they exchange their algal symbionts |
|
for more heat-tolerant varieties. Conservation efforts focused on cultivating these resilient species may |
|
provide hope for reef preservation. |
|
""", |
|
"chunks": [] |
|
}, |
|
"arango_db_name": "test_db", |
|
"arango_id": "articles/test_article", |
|
"is_sci": True |
|
} |
|
|
|
# Send request to server |
|
print("Sending document to server for summarization...") |
|
response = requests.post(url, json=sample_document) |
|
|
|
if response.status_code == 200: |
|
print("Request accepted. Response:", response.json()) |
|
|
|
# Save values for checking results later |
|
return { |
|
"db_name": sample_document["arango_db_name"], |
|
"doc_id": sample_document["arango_id"] |
|
} |
|
else: |
|
print(f"Error: {response.status_code}") |
|
print(response.text) |
|
return None |
|
|
|
def test_summarize_chunks(): |
|
""" |
|
Test the chunk summarization functionality directly by creating a sample document with chunks. |
|
|
|
In a real application, you'd typically query the results from the database after processing. |
|
""" |
|
print("\nTesting chunk summarization example...") |
|
|
|
# Sample document with chunks |
|
sample_document_with_chunks = { |
|
"arango_doc": { |
|
"text": "", |
|
"chunks": [ |
|
{ |
|
"text": "Climate change has significantly affected marine ecosystems worldwide, with coral reefs being among the most vulnerable. Rising sea temperatures have led to increased coral bleaching events.", |
|
"pages": [1] |
|
}, |
|
{ |
|
"text": "Studies by Smith et al. [1] show that even a 1-2°C increase in water temperature can trigger mass bleaching events. Additionally, ocean acidification makes it difficult for corals to build their calcium carbonate skeletons.", |
|
"pages": [1, 2] |
|
} |
|
] |
|
}, |
|
"arango_db_name": "test_db", |
|
"arango_id": "interviews/test_interview", |
|
"is_sci": False |
|
} |
|
|
|
url = "http://localhost:8100/summarise_document" |
|
print("Sending document with chunks for summarization...") |
|
response = requests.post(url, json=sample_document_with_chunks) |
|
|
|
if response.status_code == 200: |
|
print("Request accepted. Response:", response.json()) |
|
return { |
|
"db_name": sample_document_with_chunks["arango_db_name"], |
|
"doc_id": sample_document_with_chunks["arango_id"] |
|
} |
|
else: |
|
print(f"Error: {response.status_code}") |
|
print(response.text) |
|
return None |
|
|
|
def poll_for_results(doc_info, max_retries=10, wait_time=5): |
|
""" |
|
Poll the ArangoDB database to check if the document has been summarized. |
|
|
|
Args: |
|
doc_info (dict): Dictionary containing db_name and doc_id |
|
max_retries (int): Maximum number of polling attempts |
|
wait_time (int): Time to wait between polling attempts (seconds) |
|
|
|
Returns: |
|
dict or None: The document with summaries if available, None otherwise |
|
""" |
|
if not doc_info: |
|
return None |
|
|
|
db_name = doc_info["db_name"] |
|
doc_id = doc_info["doc_id"] |
|
|
|
print(f"\nPolling for results in {db_name}/{doc_id}...") |
|
|
|
arango = ArangoDB(db_name=db_name) |
|
|
|
for attempt in range(max_retries): |
|
print(f"Attempt {attempt+1}/{max_retries}...") |
|
|
|
try: |
|
# Get the document from ArangoDB |
|
document = arango.get_document(doc_id) |
|
|
|
# Check if the document has been summarized |
|
if document and "summary" in document: |
|
print("✓ Document summary found!") |
|
print("-" * 50) |
|
print("Document Summary:") |
|
print("-" * 50) |
|
print(document["summary"]["text_sum"]) |
|
print("-" * 50) |
|
|
|
# Check if chunks have been summarized |
|
if "chunks" in document and document["chunks"] and "summary" in document["chunks"][0]: |
|
print("✓ Chunk summaries found!") |
|
print("-" * 50) |
|
print("First Chunk Summary:") |
|
print("-" * 50) |
|
print(document["chunks"][0]["summary"]) |
|
print("-" * 50) |
|
if len(document["chunks"]) > 1: |
|
print("Tags:", document["chunks"][0]["tags"]) |
|
|
|
return document |
|
|
|
# If we haven't found summaries yet, wait and try again |
|
time.sleep(wait_time) |
|
|
|
except Exception as e: |
|
print(f"Error checking document: {e}") |
|
time.sleep(wait_time) |
|
|
|
print("❌ Summarization not completed after maximum retries.") |
|
return None |
|
|
|
if __name__ == "__main__": |
|
print("LLM Server Test Script") |
|
print("=====================\n") |
|
|
|
# Test if server is running |
|
try: |
|
requests.get("http://localhost:8100") |
|
print("Server is running at http://localhost:8100\n") |
|
except requests.exceptions.ConnectionError: |
|
print("ERROR: Cannot connect to server at http://localhost:8100") |
|
print("Make sure the server is running before continuing.\n") |
|
exit(1) |
|
|
|
# Run tests and store document info for polling |
|
doc1_info = test_summarize_document() |
|
time.sleep(2) # Brief pause between tests |
|
doc2_info = test_summarize_chunks() |
|
|
|
print("\nWaiting for background tasks to complete...") |
|
print("This may take some time depending on LLM response speed.") |
|
|
|
# Poll for results (with longer wait time for the first document which needs to be chunked) |
|
poll_for_results(doc1_info, max_retries=20, wait_time=6) |
|
poll_for_results(doc2_info, max_retries=12, wait_time=5) |
|
|
|
print("\nTest script completed.") |
|
print("If you didn't see results, the background tasks might still be processing.") |
|
print("You can run this script again later to check, or query the database directly.") |