You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

74 lines
2.4 KiB

from _arango import arango
from _chroma import ChromaDB
from langchain_text_splitters import CharacterTextSplitter
from print_color import *
from _llm import LLM
def truncate():
arango.db.collection("other_persons").truncate()
arango.db.collection("all_relations").truncate()
def clear_info_persons():
persons = list(arango.db.collection("persons").all())
for person in persons:
if not person['confirmed']:
arango.db.collection("persons").delete(person)
continue
person["info"] = []
person["mentioned_in_interrogation"] = []
person["mentioned_as"] = {}
arango.db.collection("persons").update(person, merge=False)
def clear_changer_interrogations():
interrogations = list(arango.db.collection("interrogations").all())
for interrogation in interrogations:
interrogation["mentioned_persons"] = []
arango.db.collection("interrogations").update(interrogation, merge=False)
def clean_mentioned_as():
persons = list(arango.db.collection("persons").all())
for person in persons:
if "mentioned_as" in person:
mentioned_as = []
for i in person["mentioned_as"]:
if i not in mentioned_as:
mentioned_as.append(i)
person["mentioned_as"] = mentioned_as
person['info'] = []
arango.db.collection("persons").update(person, merge=False)
text_splitter = CharacterTextSplitter(
separator="\n\n",
chunk_size=1000,
chunk_overlap=100,
length_function=len,
is_separator_regex=False,
)
db = arango.db
interrogations = list(db.aql.execute('for doc in interrogations filter doc.person_mentioned_as == null return doc'))
pms = list(db.aql.execute('for doc in pms return {"_id": doc._id, "page": doc.page}'))
interrogations = interrogations + pms
interrogations.sort(key=lambda x: x['page'])
for i in interrogations:
llm = LLM(chat=False)
if 'text' not in i:
continue
text = i['text'][:1000]
print_purple(text)
name = i['name']
prompt = f'''Nedan är ett förhör med {name}: \n\n\n{text}\n\n\nOm du ser till själva förhöret, vilket namn används för {name}? Om personen exempelvis bara skrivs ut med förnamn så vara med det. Svara ENBART med namnet, inget annat.'''
answer = llm.generate(prompt)
i['person_mentioned_as'] = answer
db.collection('interrogations').update(i, check_rev=False)