from _chroma import ChromaDB from _arango import arango from _llm import LLM from pprint import pprint from print_color import * import multiprocessing def find_person(person): """ Finds a person in the database based on the given person information. Args: person (dict): A dictionary containing information about the person. Returns: list: A list of tuples containing the following information: - generated answer (str): The generated answer from the language model. - person information (dict): Information about the matched person in the database. - interrogation document (dict): The document containing the interrogation text. - mentioned person name (str): The name of the person mentioned in the interrogation. - matched person name (str): The name of the person matched in the database. - original person information (dict): The original information about the person. """ db = arango.db llm = LLM() other_person = person["name"] chroma = ChromaDB() col = chroma.client.get_or_create_collection("mala_persons") if "is_not" not in other_person: filter_isnot = {} else: filter_isnot = {"name": {"$nin": other_person["is_not"]}} # Do a query to find the person hits = col.query(query_texts=[other_person], n_results=1, where=filter_isnot) found_person = hits["documents"][0][0] found_person_key = hits["metadatas"][0][0]["_key"] distance = hits["distances"][0][0] # * Filter out hits with distance > 1 if distance > 1: return [] found_person_in_arango = db.collection("persons").get(found_person_key) found_person_info = "\n".join(found_person_in_arango["info"]) prompt = f"Nedan är olika bitar med information om en person:\n\n{found_person_info}\n\nSammanfatta dessa på ett detaljerat sätt, var noga med namn, platser, händelser och relationer. Använd bara sånt som finns i informationen. Svara ENBART med sammanfattningen, ingenting annat. " person_in_arango_summary = llm.generate(prompt) # Write summary about the person interrogations = person["mentioned_in_interrogation"] output = [] for interrogation in interrogations: interrogation_doc = db.collection("interrogations").get(interrogation) text = interrogation_doc["text"] prompt = f'''I texten nedan omnämns en "{other_person}" och jag försöker förstå om det kan vara exempelvis ett felstavat namn eller smeknamn för en annan person.\n TEXT: """{text}"""\n På andra ställen i polisens förundersökning finns en person som heter "{found_person}", och som beskrivs så här: """{person_in_arango_summary}"""\n Verkar det troligt att personen som kallas {other_person} är samma person som {found_person}? Svara bara JA eller NEJ, samt en kort förklaring till varför. ''' # Om istället förnamnet eller efternamnet är helt olika så är det förmodligen inte samma person.Om det bara är ett namn (inget efternamn) kan det också handla om ett smeknamn eller en beskrivning. answer = llm.generate(prompt) output.append( ( answer, found_person_in_arango, interrogation_doc, other_person, found_person, found_person_info, person, ) ) return output def verify( db, answer=None, person=None, person_in_arango=None, interrogation_key=None, ): """ Verifies the answer for a person's identification in an interrogation. Args: db: The database object. answer (str): The answer for the person's identification. Can be "Yes", "No", or "Unknown". person (dict): The person's information. person_in_arango (dict): The person's information in ArangoDB. text (str): The text mentioning the person in the interrogation. interrogation_key (str): The key identifying the interrogation. Returns: None """ print_blue("Answer:", answer) # If the answer is Yes if answer == "Yes": person["mentioned_in_interrogation"].remove(interrogation_key) person_in_arango["confirmed"] = True db.collection("persons").update(person) person_in_arango["info"] += person["info"] person_in_arango["mentioned_in_interrogation"] += ["mentioned_in_interrogation"] from pprint import pprint print("Updated person in arango:") pprint( db.collection("persons").insert(person_in_arango, overwrite_mode="update") ) if person["mentioned_in_interrogation"] == [] and person['_key'] != person_in_arango['_key']: db.collection("other_persons").insert(person, overwrite=True) db.collection("persons").delete(person, check_rev=False) print(f"Removed {person}") # If the answer is No if answer == "No": if "is_not" not in person: person["is_not"] = [] person["is_not"].append([person_in_arango["name"]]) db.collection("persons").update(person, merge=True, check_rev=False) # If the answer is Unknown if answer == "Unknown": db.collection("unknown").insert( {"name": person, "interrogation": interrogation_key}, overwrite=True ) if __name__ == "__main__": db = arango.db persons = list(db.collection("persons").all()) q = "for doc in persons filter doc.other == true return doc" other_persons = [i for i in db.aql.execute(q)] for person in other_persons: print(find_person(person)) exit() # with multiprocessing.Pool() as pool: # pool.map(find_person, other_persons)