sci/test_and_view.py

#!/usr/bin/env python3
"""
Test LLM Server and View Results

This script sends a test document to the LLM server for summarization,
waits for processing to complete, and displays the results.

Usage:
    python test_and_view.py [--wait SECONDS] [--retries COUNT]

Options:
    --wait SECONDS    Number of seconds to wait between polling attempts (default: 5)
    --retries COUNT   Maximum number of polling attempts (default: 20)
"""

import requests
import json
import time
import os
import argparse
import sys
from _arango import ArangoDB


def send_test_document():
    """Send a test document to the LLM server for summarization."""
    print("Sending test document to LLM server...")

    # Define server endpoint
    url = "http://localhost:8100/summarise_document"

    # Create a sample document with unique ID based on timestamp
    doc_id = f"test_articles/climate_impact_{int(time.time())}"

    sample_document = {
        "arango_doc": {
            "text": """
            The Impact of Climate Change on Coral Reefs

            Climate change has significantly affected marine ecosystems worldwide, with coral reefs being among the most vulnerable.
            Rising sea temperatures have led to increased coral bleaching events, where corals expel their symbiotic algae,
            leading to whitening and potential death. Studies show that even a 1-2°C increase in water temperature
            can trigger mass bleaching events. Additionally, ocean acidification caused by increased CO2 absorption
            makes it difficult for corals to build their calcium carbonate skeletons.

            Recent research by Johnson et al. (2023) suggests that if current trends continue, we may lose up to 90%
            of coral reefs by 2050. However, some corals have shown remarkable resilience. Certain species can adapt
            to higher temperatures through a process called adaptive bleaching, where they exchange their algal symbionts
            for more heat-tolerant varieties. Conservation efforts focused on cultivating these resilient species may
            provide hope for reef preservation.
            """,
            "chunks": []
        },
        "arango_db_name": "test_db",
        "arango_id": doc_id,
        "is_sci": True
    }

    try:
        # Send request to server
        response = requests.post(url, json=sample_document)

        if response.status_code == 200:
            print("✓ Request accepted by server")
            print(f"Document ID: {doc_id}")
            return {
                "db_name": "test_db",
                "doc_id": doc_id
            }
        else:
            print(f"✗ Error: {response.status_code}")
            print(response.text)
            return None
    except Exception as e:
        print(f"✗ Connection error: {e}")
        return None


def poll_for_results(doc_info, max_retries=20, wait_time=5):
    """Poll the database until the document is summarized."""
    if not doc_info:
        return None

    db_name = doc_info["db_name"]
    doc_id = doc_info["doc_id"]

    print(f"\nPolling for results in {db_name}/{doc_id}...")
    print(f"Will check every {wait_time} seconds, up to {max_retries} times.")

    arango = ArangoDB(db_name=db_name)

    for attempt in range(max_retries):
        print(f"Attempt {attempt+1}/{max_retries}... ", end="", flush=True)

        try:
            # Get the document from ArangoDB
            document = arango.get_document(doc_id)

            # Check if the document has been summarized
            if document and "summary" in document:
                print("✓ Document summary found!")
                return document

            print("Document exists but no summary yet")
            time.sleep(wait_time)

        except Exception as e:
            print(f"Error: {e}")
            time.sleep(wait_time)

    print("\n✗ Summarization not completed after maximum retries.")
    return None


def display_results(document):
    """Display the summarization results."""
    if not document:
        print("\nNo results to display")
        return

    print("\n" + "=" * 80)
    print(f"RESULTS FOR DOCUMENT: {document.get('_id', 'Unknown')}")
    print("=" * 80)

    # Document summary
    print("\n📄 DOCUMENT SUMMARY")
    print("-" * 80)
    print(document["summary"]["text_sum"])

    # Model info if available
    if "meta" in document["summary"]:
        meta = document["summary"]["meta"]
        model = meta.get("model", "Unknown")
        temp = meta.get("temperature", "Unknown")
        print(f"\nGenerated using: {model} (temperature: {temp})")

    # Check for summarized chunks
    if "chunks" in document and document["chunks"]:
        summarized_chunks = [chunk for chunk in document["chunks"] if "summary" in chunk]
        print(f"\n🧩 CHUNK SUMMARIES ({len(summarized_chunks)}/{len(document['chunks'])} chunks processed)")

        for i, chunk in enumerate(summarized_chunks):
            print("\n" + "-" * 80)
            print(f"Chunk {i+1}:")
            print("-" * 80)
            print(chunk["summary"])

            # Display tags
            if "tags" in chunk and chunk["tags"]:
                print("\nTags:", ", ".join(chunk["tags"]))

            # Display references
            if "references" in chunk and chunk["references"]:
                print("\nReferences:")
                for ref in chunk["references"]:
                    print(f"- {ref}")

    print("\n" + "=" * 80)

    # Provide links to web views
    print("\nView in browser:")
    print("- HTML view: http://localhost:8100/html_results")
    print("- JSON view: http://localhost:8100/view_results")


def check_server_status():
    """Check if the LLM server is running."""
    try:
        response = requests.get("http://localhost:8100/latest_result", timeout=2)
        return True
    except:
        return False


def main():
    parser = argparse.ArgumentParser(description='Test LLM server and view results')
    parser.add_argument('--wait', type=int, default=5, help='Seconds to wait between polling attempts')
    parser.add_argument('--retries', type=int, default=20, help='Maximum number of polling attempts')
    args = parser.parse_args()

    print("LLM Server Test and View")
    print("======================\n")

    # Check if server is running
    if not check_server_status():
        print("ERROR: Cannot connect to LLM server at http://localhost:8100")
        print("Make sure the server is running before continuing.")
        sys.exit(1)

    print("✓ Server is running\n")

    # Send test document
    doc_info = send_test_document()
    if not doc_info:
        print("Failed to send test document")
        sys.exit(1)

    print("\n⏳ Processing document...")
    print("(This may take some time depending on model size and document complexity)")

    # Poll for results
    result = poll_for_results(doc_info, max_retries=args.retries, wait_time=args.wait)

    # Display results
    display_results(result)


if __name__ == "__main__":
    main()