from _arango import arango from _chroma import ChromaDB from langchain_text_splitters import CharacterTextSplitter from print_color import * from _llm import LLM def truncate(): arango.db.collection("other_persons").truncate() arango.db.collection("all_relations").truncate() def clear_info_persons(): persons = list(arango.db.collection("persons").all()) for person in persons: if not person['confirmed']: arango.db.collection("persons").delete(person) continue person["info"] = [] person["mentioned_in_interrogation"] = [] person["mentioned_as"] = {} arango.db.collection("persons").update(person, merge=False) def clear_changer_interrogations(): interrogations = list(arango.db.collection("interrogations").all()) for interrogation in interrogations: interrogation["mentioned_persons"] = [] arango.db.collection("interrogations").update(interrogation, merge=False) def clean_mentioned_as(): persons = list(arango.db.collection("persons").all()) for person in persons: if "mentioned_as" in person: mentioned_as = [] for i in person["mentioned_as"]: if i not in mentioned_as: mentioned_as.append(i) person["mentioned_as"] = mentioned_as person['info'] = [] arango.db.collection("persons").update(person, merge=False) text_splitter = CharacterTextSplitter( separator="\n\n", chunk_size=1000, chunk_overlap=100, length_function=len, is_separator_regex=False, ) db = arango.db interrogations = list(db.aql.execute('for doc in interrogations filter doc.person_mentioned_as == null return doc')) pms = list(db.aql.execute('for doc in pms return {"_id": doc._id, "page": doc.page}')) interrogations = interrogations + pms interrogations.sort(key=lambda x: x['page']) for i in interrogations: llm = LLM(chat=False) if 'text' not in i: continue text = i['text'][:1000] print_purple(text) name = i['name'] prompt = f'''Nedan är ett förhör med {name}: \n\n\n{text}\n\n\nOm du ser till själva förhöret, vilket namn används för {name}? Om personen exempelvis bara skrivs ut med förnamn så vara med det. Svara ENBART med namnet, inget annat.''' answer = llm.generate(prompt) i['person_mentioned_as'] = answer db.collection('interrogations').update(i, check_rev=False)