You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
74 lines
2.4 KiB
74 lines
2.4 KiB
from _arango import arango |
|
from _chroma import ChromaDB |
|
from langchain_text_splitters import CharacterTextSplitter |
|
from print_color import * |
|
from _llm import LLM |
|
|
|
def truncate(): |
|
arango.db.collection("other_persons").truncate() |
|
arango.db.collection("all_relations").truncate() |
|
|
|
|
|
def clear_info_persons(): |
|
persons = list(arango.db.collection("persons").all()) |
|
for person in persons: |
|
if not person['confirmed']: |
|
arango.db.collection("persons").delete(person) |
|
continue |
|
person["info"] = [] |
|
person["mentioned_in_interrogation"] = [] |
|
person["mentioned_as"] = {} |
|
arango.db.collection("persons").update(person, merge=False) |
|
|
|
|
|
def clear_changer_interrogations(): |
|
interrogations = list(arango.db.collection("interrogations").all()) |
|
for interrogation in interrogations: |
|
interrogation["mentioned_persons"] = [] |
|
arango.db.collection("interrogations").update(interrogation, merge=False) |
|
|
|
|
|
def clean_mentioned_as(): |
|
persons = list(arango.db.collection("persons").all()) |
|
for person in persons: |
|
if "mentioned_as" in person: |
|
mentioned_as = [] |
|
for i in person["mentioned_as"]: |
|
if i not in mentioned_as: |
|
mentioned_as.append(i) |
|
person["mentioned_as"] = mentioned_as |
|
person['info'] = [] |
|
arango.db.collection("persons").update(person, merge=False) |
|
|
|
|
|
|
|
text_splitter = CharacterTextSplitter( |
|
separator="\n\n", |
|
chunk_size=1000, |
|
chunk_overlap=100, |
|
length_function=len, |
|
is_separator_regex=False, |
|
) |
|
|
|
|
|
db = arango.db |
|
|
|
interrogations = list(db.aql.execute('for doc in interrogations filter doc.person_mentioned_as == null return doc')) |
|
pms = list(db.aql.execute('for doc in pms return {"_id": doc._id, "page": doc.page}')) |
|
interrogations = interrogations + pms |
|
|
|
|
|
interrogations.sort(key=lambda x: x['page']) |
|
|
|
for i in interrogations: |
|
llm = LLM(chat=False) |
|
if 'text' not in i: |
|
continue |
|
text = i['text'][:1000] |
|
print_purple(text) |
|
name = i['name'] |
|
prompt = f'''Nedan är ett förhör med {name}: \n\n\n{text}\n\n\nOm du ser till själva förhöret, vilket namn används för {name}? Om personen exempelvis bara skrivs ut med förnamn så vara med det. Svara ENBART med namnet, inget annat.''' |
|
answer = llm.generate(prompt) |
|
i['person_mentioned_as'] = answer |
|
db.collection('interrogations').update(i, check_rev=False) |
|
|
|
|