import requests import json import time from _arango import ArangoDB # Import ArangoDB client to fetch results def test_summarize_document(): """ Test the document summarization functionality of the LLM server by sending a POST request to the summarize_document endpoint. This function creates a sample document, sends it to the LLM server, and then polls for results. """ print("Testing document summarization...") # Define server endpoint url = "http://localhost:8100/summarise_document" # Create a sample document sample_document = { "arango_doc": { "text": """ The Impact of Climate Change on Coral Reefs Climate change has significantly affected marine ecosystems worldwide, with coral reefs being among the most vulnerable. Rising sea temperatures have led to increased coral bleaching events, where corals expel their symbiotic algae, leading to whitening and potential death. Studies show that even a 1-2°C increase in water temperature can trigger mass bleaching events. Additionally, ocean acidification caused by increased CO2 absorption makes it difficult for corals to build their calcium carbonate skeletons. Recent research by Johnson et al. (2023) suggests that if current trends continue, we may lose up to 90% of coral reefs by 2050. However, some corals have shown remarkable resilience. Certain species can adapt to higher temperatures through a process called adaptive bleaching, where they exchange their algal symbionts for more heat-tolerant varieties. Conservation efforts focused on cultivating these resilient species may provide hope for reef preservation. """, "chunks": [] }, "arango_db_name": "test_db", "arango_id": "articles/test_article", "is_sci": True } # Send request to server print("Sending document to server for summarization...") response = requests.post(url, json=sample_document) if response.status_code == 200: print("Request accepted. Response:", response.json()) # Save values for checking results later return { "db_name": sample_document["arango_db_name"], "doc_id": sample_document["arango_id"] } else: print(f"Error: {response.status_code}") print(response.text) return None def test_summarize_chunks(): """ Test the chunk summarization functionality directly by creating a sample document with chunks. In a real application, you'd typically query the results from the database after processing. """ print("\nTesting chunk summarization example...") # Sample document with chunks sample_document_with_chunks = { "arango_doc": { "text": "", "chunks": [ { "text": "Climate change has significantly affected marine ecosystems worldwide, with coral reefs being among the most vulnerable. Rising sea temperatures have led to increased coral bleaching events.", "pages": [1] }, { "text": "Studies by Smith et al. [1] show that even a 1-2°C increase in water temperature can trigger mass bleaching events. Additionally, ocean acidification makes it difficult for corals to build their calcium carbonate skeletons.", "pages": [1, 2] } ] }, "arango_db_name": "test_db", "arango_id": "interviews/test_interview", "is_sci": False } url = "http://localhost:8100/summarise_document" print("Sending document with chunks for summarization...") response = requests.post(url, json=sample_document_with_chunks) if response.status_code == 200: print("Request accepted. Response:", response.json()) return { "db_name": sample_document_with_chunks["arango_db_name"], "doc_id": sample_document_with_chunks["arango_id"] } else: print(f"Error: {response.status_code}") print(response.text) return None def poll_for_results(doc_info, max_retries=10, wait_time=5): """ Poll the ArangoDB database to check if the document has been summarized. Args: doc_info (dict): Dictionary containing db_name and doc_id max_retries (int): Maximum number of polling attempts wait_time (int): Time to wait between polling attempts (seconds) Returns: dict or None: The document with summaries if available, None otherwise """ if not doc_info: return None db_name = doc_info["db_name"] doc_id = doc_info["doc_id"] print(f"\nPolling for results in {db_name}/{doc_id}...") arango = ArangoDB(db_name=db_name) for attempt in range(max_retries): print(f"Attempt {attempt+1}/{max_retries}...") try: # Get the document from ArangoDB document = arango.get_document(doc_id) # Check if the document has been summarized if document and "summary" in document: print("✓ Document summary found!") print("-" * 50) print("Document Summary:") print("-" * 50) print(document["summary"]["text_sum"]) print("-" * 50) # Check if chunks have been summarized if "chunks" in document and document["chunks"] and "summary" in document["chunks"][0]: print("✓ Chunk summaries found!") print("-" * 50) print("First Chunk Summary:") print("-" * 50) print(document["chunks"][0]["summary"]) print("-" * 50) if len(document["chunks"]) > 1: print("Tags:", document["chunks"][0]["tags"]) return document # If we haven't found summaries yet, wait and try again time.sleep(wait_time) except Exception as e: print(f"Error checking document: {e}") time.sleep(wait_time) print("❌ Summarization not completed after maximum retries.") return None if __name__ == "__main__": print("LLM Server Test Script") print("=====================\n") # Test if server is running try: requests.get("http://localhost:8100") print("Server is running at http://localhost:8100\n") except requests.exceptions.ConnectionError: print("ERROR: Cannot connect to server at http://localhost:8100") print("Make sure the server is running before continuing.\n") exit(1) # Run tests and store document info for polling doc1_info = test_summarize_document() time.sleep(2) # Brief pause between tests doc2_info = test_summarize_chunks() print("\nWaiting for background tasks to complete...") print("This may take some time depending on LLM response speed.") # Poll for results (with longer wait time for the first document which needs to be chunked) poll_for_results(doc1_info, max_retries=20, wait_time=6) poll_for_results(doc2_info, max_retries=12, wait_time=5) print("\nTest script completed.") print("If you didn't see results, the background tasks might still be processing.") print("You can run this script again later to check, or query the database directly.")