diff --git a/backend/app.py b/backend/app.py index cc2722a..9d8e96e 100644 --- a/backend/app.py +++ b/backend/app.py @@ -6,6 +6,7 @@ from datetime import datetime import httpx from fastapi import Depends, FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware +from arango_client import arango from info import debate_types, explainer, limit_warning, party_colors from .schemas import ( @@ -50,36 +51,28 @@ def meta(): @app.post("/api/search", response_model=SearchResponse) def search(payload: SearchRequest): - print('PAYLOAD IN APP', payload) results, stats, limit_reached = search_service.search( payload, include_snippets=payload.include_snippets ) - - print(f'Search service returned {len(results)} results') - - # Log first result to see structure - if results: - print('First result structure:', results[0].keys() if isinstance(results[0], dict) else 'not a dict') - print('First result _id:', results[0].get('_id') if isinstance(results[0], dict) else 'N/A') - + # Try to convert results to TalkHit objects hits = [] for idx, hit in enumerate(results): try: talk_hit = TalkHit(**hit) - hits.append(talk_hit) + # Serialize using alias so 'id' is sent to frontend, not '_id' + hit_dict = talk_hit.dict(by_alias=True) + hits.append(hit_dict) except Exception as e: - print(f'Error converting result {idx} to TalkHit: {e}') - print(f'Problematic result: {hit}') + print(f"Error converting result {idx} to TalkHit: {e}") + print(f"Problematic result: {hit}") # Continue with other results instead of failing completely continue - - print(f'Successfully converted {len(hits)} results to TalkHit objects') - - return SearchResponse( - results=hits, - stats=stats, - active_filters={ + + return { + "results": hits, + "stats": stats, + "active_filters": { "parties": payload.parties, "people": payload.people, "debates": payload.debates, @@ -88,8 +81,8 @@ def search(payload: SearchRequest): "speaker_ids": payload.speaker_ids, "speaker": payload.speaker, }, - limit_reached=limit_reached, - ) + "limit_reached": limit_reached, + } @app.post("/api/chat", response_model=ChatResponse) @@ -119,3 +112,106 @@ def chat(payload: ChatRequest) -> ChatResponse: return ChatResponse(answer=chat_result["answer"], sources=chat_result["sources"]) +@app.get("/api/talk/{talk_id}") +async def get_talk(talk_id: str) -> dict: + """ + Fetch a single talk document by its ID from the 'talks' collection. + + This endpoint accepts either: + - A full _id like "talks/H40911" + - Just the _key like "H40911" (will be prefixed with "talks/") + + The document is joined with the corresponding person from the 'people' collection + using the intressent_id field. + + The response also includes lightweight navigation data (previous/next speeches) + for the same debate when ordering information is available. + + Args: + talk_id (str): The talk ID (either full _id or just _key) + + Returns: + dict: The talk document with person information merged in + + Raises: + HTTPException: 404 if talk not found + """ + # If the ID doesn't contain a slash, assume it's just the _key and prefix with collection + if "/" not in talk_id: + full_id = f"talks/{talk_id}" + else: + full_id = talk_id + + # AQL query to fetch the talk and join with person data + query = """ + LET doc_full = DOCUMENT(@talk_id) + FILTER doc_full != null + + /* Only keep the relevant fields from the talk */ + LET doc = KEEP( + doc_full, + [ + "anforandetext", + "talare", + "parti", + "datum", + "kammaraktivitet", + "avsnittsrubrik", + "titel", + "anforande_nummer", + "replik", + "url_session", + "url_audio" + ] + ) + + /* Fetch person only if intressent_id exists */ + LET person_full = doc_full.intressent_id + ? DOCUMENT(CONCAT("people/", doc_full.intressent_id)) + : null + + LET person = person_full + ? KEEP(person_full, ["bild_url_192", "tilltalsnamn", "efternamn", "valkrets", "status"]) + : null + + /* Interpret anforande_nummer as number */ + LET num = IS_NUMBER(TO_NUMBER(doc.anforande_nummer)) ? TO_NUMBER(doc.anforande_nummer) : null + + LET previous = num != null + ? FIRST( + FOR t IN talks + FILTER t.datum == doc.datum + AND t.kammaraktivitet == doc.kammaraktivitet + AND IS_NUMBER(TO_NUMBER(t.anforande_nummer)) + AND TO_NUMBER(t.anforande_nummer) == num - 1 + RETURN t._id + ) + : null + + LET next = num != null + ? FIRST( + FOR t IN talks + FILTER t.datum == doc.datum + AND t.kammaraktivitet == doc.kammaraktivitet + AND IS_NUMBER(TO_NUMBER(t.anforande_nummer)) + AND TO_NUMBER(t.anforande_nummer) == num + 1 + RETURN t._id + ) + : null + + RETURN MERGE(doc, { + person: person, + navigation: { + previous: previous, + next: next + } + }) + + """ + + results = arango.execute_aql(query, bind_vars={"talk_id": full_id}) + + if not results or results[0] is None: + raise HTTPException(status_code=404, detail=f"Talk not found: {talk_id}") + + return results[0] diff --git a/backend/schemas.py b/backend/schemas.py index 1be5bde..4d8669b 100644 --- a/backend/schemas.py +++ b/backend/schemas.py @@ -33,7 +33,7 @@ class SearchRequest(SearchFilters): class TalkHit(BaseModel): - _id: str # Changed from expecting specific format - just accept any string + id: str = Field(..., alias="_id") # Use 'id' as field name, alias to '_id' text: str snippet: Optional[str] = None # Add default to make validation more forgiving snippet_long: Optional[str] = None @@ -51,6 +51,8 @@ class TalkHit(BaseModel): class Config: # Allow extra fields from the database that we don't explicitly define extra = "ignore" + validate_by_name = True + allow_population_by_alias = True class AggregatedStats(BaseModel): diff --git a/backend/services/search.py b/backend/services/search.py index c22b9cb..7338303 100644 --- a/backend/services/search.py +++ b/backend/services/search.py @@ -649,6 +649,7 @@ class SearchService: results.append( { "_id": doc.get("_id"), + # "id": doc.get("_id"), # Optional: add for debugging "text": text, "snippet": snippet, "snippet_long": snippet_long,