Refactor search response structure and add get_talk endpoint

- Updated SearchResponse to return results with 'id' instead of '_id'. - Introduced new endpoint /api/talk/{talk_id} to fetch individual talk documents. - Enhanced TalkHit schema to use 'id' as the field name with an alias for '_id'. - Cleaned up debug print statements in search function for clarity.
4 months ago · 665abd4dfb
parent e039a496e4
commit 665abd4dfb
3 changed files with 121 additions and 22 deletions
--- a/backend/app.py
+++ b/backend/app.py
@ -6,6 +6,7 @@ from datetime import datetime
 import httpx
 from fastapi import Depends, FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from arango_client import arango
 from info import debate_types, explainer, limit_warning, party_colors
 from .schemas import (
@ -50,36 +51,28 @@ def meta():
@app.post("/api/search", response_model=SearchResponse)
 def search(payload: SearchRequest):
    print('PAYLOAD IN APP', payload)
    results, stats, limit_reached = search_service.search(
        payload, include_snippets=payload.include_snippets
    )
-    
+
    print(f'Search service returned {len(results)} results')
    # Log first result to see structure
    if results:
        print('First result structure:', results[0].keys() if isinstance(results[0], dict) else 'not a dict')
        print('First result _id:', results[0].get('_id') if isinstance(results[0], dict) else 'N/A')
    # Try to convert results to TalkHit objects
    hits = []
    for idx, hit in enumerate(results):
        try:
            talk_hit = TalkHit(**hit)
-            hits.append(talk_hit)
+            # Serialize using alias so 'id' is sent to frontend, not '_id'
            hit_dict = talk_hit.dict(by_alias=True)
            hits.append(hit_dict)
        except Exception as e:
-            print(f'Error converting result {idx} to TalkHit: {e}')
+            print(f"Error converting result {idx} to TalkHit: {e}")
-            print(f'Problematic result: {hit}')
+            print(f"Problematic result: {hit}")
            # Continue with other results instead of failing completely
            continue
-    
+
-    print(f'Successfully converted {len(hits)} results to TalkHit objects')
+    return {
-    
+        "results": hits,
-    return SearchResponse(
+        "stats": stats,
-        results=hits,
+        "active_filters": {
        stats=stats,
        active_filters={
            "parties": payload.parties,
            "people": payload.people,
            "debates": payload.debates,
@ -88,8 +81,8 @@ def search(payload: SearchRequest):
            "speaker_ids": payload.speaker_ids,
            "speaker": payload.speaker,
        },
-        limit_reached=limit_reached,
+        "limit_reached": limit_reached,
-    )
+    }
@app.post("/api/chat", response_model=ChatResponse)
@ -119,3 +112,106 @@ def chat(payload: ChatRequest) -> ChatResponse:
    return ChatResponse(answer=chat_result["answer"], sources=chat_result["sources"])
@app.get("/api/talk/{talk_id}")
 async def get_talk(talk_id: str) -> dict:
    """
    Fetch a single talk document by its ID from the 'talks' collection.
    This endpoint accepts either:
    - A full _id like "talks/H40911"
    - Just the _key like "H40911" (will be prefixed with "talks/")
    The document is joined with the corresponding person from the 'people' collection
    using the intressent_id field.
    The response also includes lightweight navigation data (previous/next speeches)
    for the same debate when ordering information is available.
    Args:
        talk_id (str): The talk ID (either full _id or just _key)
    Returns:
        dict: The talk document with person information merged in
    Raises:
        HTTPException: 404 if talk not found
    """
    # If the ID doesn't contain a slash, assume it's just the _key and prefix with collection
    if "/" not in talk_id:
        full_id = f"talks/{talk_id}"
    else:
        full_id = talk_id
    # AQL query to fetch the talk and join with person data
    query = """
    LET doc_full = DOCUMENT(@talk_id)
    FILTER doc_full != null
    /* Only keep the relevant fields from the talk */
    LET doc = KEEP(
    doc_full,
    [
        "anforandetext",
        "talare",
        "parti",
        "datum",
        "kammaraktivitet",
        "avsnittsrubrik",
        "titel",
        "anforande_nummer",
        "replik",
        "url_session",
        "url_audio"
    ]
    )
    /* Fetch person only if intressent_id exists */
    LET person_full = doc_full.intressent_id
    ? DOCUMENT(CONCAT("people/", doc_full.intressent_id))
    : null
    LET person = person_full
    ? KEEP(person_full, ["bild_url_192", "tilltalsnamn", "efternamn", "valkrets", "status"])
    : null
    /* Interpret anforande_nummer as number */
    LET num = IS_NUMBER(TO_NUMBER(doc.anforande_nummer)) ? TO_NUMBER(doc.anforande_nummer) : null
    LET previous = num != null
    ? FIRST(
        FOR t IN talks
            FILTER t.datum == doc.datum
            AND t.kammaraktivitet == doc.kammaraktivitet
            AND IS_NUMBER(TO_NUMBER(t.anforande_nummer))
            AND TO_NUMBER(t.anforande_nummer) == num - 1
            RETURN t._id
        )
    : null
    LET next = num != null
    ? FIRST(
        FOR t IN talks
            FILTER t.datum == doc.datum
            AND t.kammaraktivitet == doc.kammaraktivitet
            AND IS_NUMBER(TO_NUMBER(t.anforande_nummer))
            AND TO_NUMBER(t.anforande_nummer) == num + 1
            RETURN t._id
        )
    : null
    RETURN MERGE(doc, {
    person: person,
    navigation: {
        previous: previous,
        next: next
    }
    })
    """
    results = arango.execute_aql(query, bind_vars={"talk_id": full_id})
    if not results or results[0] is None:
        raise HTTPException(status_code=404, detail=f"Talk not found: {talk_id}")
    return results[0]
--- a/backend/schemas.py
+++ b/backend/schemas.py
@ -33,7 +33,7 @@ class SearchRequest(SearchFilters):
 class TalkHit(BaseModel):
-    _id: str  # Changed from expecting specific format - just accept any string
+    id: str = Field(..., alias="_id")  # Use 'id' as field name, alias to '_id'
    text: str
    snippet: Optional[str] = None  # Add default to make validation more forgiving
    snippet_long: Optional[str] = None
@ -51,6 +51,8 @@ class TalkHit(BaseModel):
    class Config:
        # Allow extra fields from the database that we don't explicitly define
        extra = "ignore"
        validate_by_name = True
        allow_population_by_alias = True
 class AggregatedStats(BaseModel):
--- a/backend/services/search.py
+++ b/backend/services/search.py
@ -649,6 +649,7 @@ class SearchService:
            results.append(
                {
                    "_id": doc.get("_id"),
                    # "id": doc.get("_id"),  # Optional: add for debugging
                    "text": text,
                    "snippet": snippet,
                    "snippet_long": snippet_long,