You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

73 lines
2.8 KiB

from fastapi import FastAPI, BackgroundTasks, Request
from fastapi.responses import JSONResponse
import logging
from prompts import get_summary_prompt
from _llm import LLM
from _arango import ArangoDB
app = FastAPI()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@app.post("/summarise_document")
async def summarize_document(request: Request, background_tasks: BackgroundTasks):
try:
data = await request.json()
logger.info(f"Received data: {data}")
# Clean the data
data['text'] = data.get('text', '').strip()
data['arango_db_name'] = data.get('arango_db_name', '').strip()
data['arango_id'] = data.get('arango_id', '').strip()
data['is_sci'] = data.get('is_sci', False)
background_tasks.add_task(summarise_document_task, data)
return {"message": "Document summarization has started."}
except Exception as e:
logger.error(f"Error in summarize_document: {e}")
return JSONResponse(
status_code=500,
content={"detail": "An unexpected error occurred."},
)
def summarise_document_task(doc_data: dict):
try:
_id = doc_data.get("arango_id")
text = doc_data.get("text")
is_sci = doc_data.get("is_sci", False)
if _id.split('/')[0] == 'interviews':
system_message = "You are summarising interview transcripts. It is very important that you keep to what is written and do not add any of your own opinions or interpretations. Always answer in English."
elif is_sci or _id.split('/')[0] == 'sci_articles':
system_message = "You are summarising scientific articles. It is very important that you keep to what is written and do not add any of your own opinions or interpretations. Always answer in English."
else:
system_message = "You are summarising a document. It is very important that you keep to what is written and do not add any of your own opinions or interpretations. Always answer in English."
llm = LLM(system_message=system_message)
prompt = get_summary_prompt(text, is_sci)
summary = llm.generate(query=prompt)
summary_doc = {
"text_sum": summary,
"meta": {
"model": llm.model,
"temperature": llm.options["temperature"],
},
}
arango = ArangoDB(db_name=doc_data.get("arango_db_name"))
arango.db.update_document(
{"summary": summary_doc, "_id": _id},
silent=True,
check_rev=False,
)
except Exception as e:
logger.error(f'_id: _{id}')
logger.error(f"Error in summarise_document_task: {e}")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8100)