Refactor search response structure and add get_talk endpoint

- Updated SearchResponse to return results with 'id' instead of '_id'.
- Introduced new endpoint /api/talk/{talk_id} to fetch individual talk documents.
- Enhanced TalkHit schema to use 'id' as the field name with an alias for '_id'.
- Cleaned up debug print statements in search function for clarity.
master
Lasse Server 2 months ago
parent e039a496e4
commit 665abd4dfb
  1. 138
      backend/app.py
  2. 4
      backend/schemas.py
  3. 1
      backend/services/search.py

@ -6,6 +6,7 @@ from datetime import datetime
import httpx import httpx
from fastapi import Depends, FastAPI, HTTPException from fastapi import Depends, FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from arango_client import arango
from info import debate_types, explainer, limit_warning, party_colors from info import debate_types, explainer, limit_warning, party_colors
from .schemas import ( from .schemas import (
@ -50,36 +51,28 @@ def meta():
@app.post("/api/search", response_model=SearchResponse) @app.post("/api/search", response_model=SearchResponse)
def search(payload: SearchRequest): def search(payload: SearchRequest):
print('PAYLOAD IN APP', payload)
results, stats, limit_reached = search_service.search( results, stats, limit_reached = search_service.search(
payload, include_snippets=payload.include_snippets payload, include_snippets=payload.include_snippets
) )
print(f'Search service returned {len(results)} results')
# Log first result to see structure
if results:
print('First result structure:', results[0].keys() if isinstance(results[0], dict) else 'not a dict')
print('First result _id:', results[0].get('_id') if isinstance(results[0], dict) else 'N/A')
# Try to convert results to TalkHit objects # Try to convert results to TalkHit objects
hits = [] hits = []
for idx, hit in enumerate(results): for idx, hit in enumerate(results):
try: try:
talk_hit = TalkHit(**hit) talk_hit = TalkHit(**hit)
hits.append(talk_hit) # Serialize using alias so 'id' is sent to frontend, not '_id'
hit_dict = talk_hit.dict(by_alias=True)
hits.append(hit_dict)
except Exception as e: except Exception as e:
print(f'Error converting result {idx} to TalkHit: {e}') print(f"Error converting result {idx} to TalkHit: {e}")
print(f'Problematic result: {hit}') print(f"Problematic result: {hit}")
# Continue with other results instead of failing completely # Continue with other results instead of failing completely
continue continue
print(f'Successfully converted {len(hits)} results to TalkHit objects') return {
"results": hits,
return SearchResponse( "stats": stats,
results=hits, "active_filters": {
stats=stats,
active_filters={
"parties": payload.parties, "parties": payload.parties,
"people": payload.people, "people": payload.people,
"debates": payload.debates, "debates": payload.debates,
@ -88,8 +81,8 @@ def search(payload: SearchRequest):
"speaker_ids": payload.speaker_ids, "speaker_ids": payload.speaker_ids,
"speaker": payload.speaker, "speaker": payload.speaker,
}, },
limit_reached=limit_reached, "limit_reached": limit_reached,
) }
@app.post("/api/chat", response_model=ChatResponse) @app.post("/api/chat", response_model=ChatResponse)
@ -119,3 +112,106 @@ def chat(payload: ChatRequest) -> ChatResponse:
return ChatResponse(answer=chat_result["answer"], sources=chat_result["sources"]) return ChatResponse(answer=chat_result["answer"], sources=chat_result["sources"])
@app.get("/api/talk/{talk_id}")
async def get_talk(talk_id: str) -> dict:
"""
Fetch a single talk document by its ID from the 'talks' collection.
This endpoint accepts either:
- A full _id like "talks/H40911"
- Just the _key like "H40911" (will be prefixed with "talks/")
The document is joined with the corresponding person from the 'people' collection
using the intressent_id field.
The response also includes lightweight navigation data (previous/next speeches)
for the same debate when ordering information is available.
Args:
talk_id (str): The talk ID (either full _id or just _key)
Returns:
dict: The talk document with person information merged in
Raises:
HTTPException: 404 if talk not found
"""
# If the ID doesn't contain a slash, assume it's just the _key and prefix with collection
if "/" not in talk_id:
full_id = f"talks/{talk_id}"
else:
full_id = talk_id
# AQL query to fetch the talk and join with person data
query = """
LET doc_full = DOCUMENT(@talk_id)
FILTER doc_full != null
/* Only keep the relevant fields from the talk */
LET doc = KEEP(
doc_full,
[
"anforandetext",
"talare",
"parti",
"datum",
"kammaraktivitet",
"avsnittsrubrik",
"titel",
"anforande_nummer",
"replik",
"url_session",
"url_audio"
]
)
/* Fetch person only if intressent_id exists */
LET person_full = doc_full.intressent_id
? DOCUMENT(CONCAT("people/", doc_full.intressent_id))
: null
LET person = person_full
? KEEP(person_full, ["bild_url_192", "tilltalsnamn", "efternamn", "valkrets", "status"])
: null
/* Interpret anforande_nummer as number */
LET num = IS_NUMBER(TO_NUMBER(doc.anforande_nummer)) ? TO_NUMBER(doc.anforande_nummer) : null
LET previous = num != null
? FIRST(
FOR t IN talks
FILTER t.datum == doc.datum
AND t.kammaraktivitet == doc.kammaraktivitet
AND IS_NUMBER(TO_NUMBER(t.anforande_nummer))
AND TO_NUMBER(t.anforande_nummer) == num - 1
RETURN t._id
)
: null
LET next = num != null
? FIRST(
FOR t IN talks
FILTER t.datum == doc.datum
AND t.kammaraktivitet == doc.kammaraktivitet
AND IS_NUMBER(TO_NUMBER(t.anforande_nummer))
AND TO_NUMBER(t.anforande_nummer) == num + 1
RETURN t._id
)
: null
RETURN MERGE(doc, {
person: person,
navigation: {
previous: previous,
next: next
}
})
"""
results = arango.execute_aql(query, bind_vars={"talk_id": full_id})
if not results or results[0] is None:
raise HTTPException(status_code=404, detail=f"Talk not found: {talk_id}")
return results[0]

@ -33,7 +33,7 @@ class SearchRequest(SearchFilters):
class TalkHit(BaseModel): class TalkHit(BaseModel):
_id: str # Changed from expecting specific format - just accept any string id: str = Field(..., alias="_id") # Use 'id' as field name, alias to '_id'
text: str text: str
snippet: Optional[str] = None # Add default to make validation more forgiving snippet: Optional[str] = None # Add default to make validation more forgiving
snippet_long: Optional[str] = None snippet_long: Optional[str] = None
@ -51,6 +51,8 @@ class TalkHit(BaseModel):
class Config: class Config:
# Allow extra fields from the database that we don't explicitly define # Allow extra fields from the database that we don't explicitly define
extra = "ignore" extra = "ignore"
validate_by_name = True
allow_population_by_alias = True
class AggregatedStats(BaseModel): class AggregatedStats(BaseModel):

@ -649,6 +649,7 @@ class SearchService:
results.append( results.append(
{ {
"_id": doc.get("_id"), "_id": doc.get("_id"),
# "id": doc.get("_id"), # Optional: add for debugging
"text": text, "text": text,
"snippet": snippet, "snippet": snippet,
"snippet_long": snippet_long, "snippet_long": snippet_long,

Loading…
Cancel
Save