You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
335 lines
11 KiB
335 lines
11 KiB
import streamlit as st |
|
from identify_person import identify, verify, find_person, UnverifiedPerson, FoundPerson |
|
from _arango import arango |
|
import re |
|
from fuzzywuzzy import process |
|
from _llm import LLM as LLM_garda |
|
from _openai import LLM_OpenAI as LLM |
|
from print_color import * |
|
from random import randint |
|
|
|
|
|
# from print_color import * |
|
print("Start") |
|
|
|
|
|
def reset_choices(): |
|
st.session_state.user_choice = None |
|
st.session_state.unconfirmed_choice = None |
|
st.session_state.custom_choice = None |
|
|
|
|
|
def check_if_dict_in_list(target_dict, list_of_dicts): |
|
|
|
target_key, target_value = list(target_dict.items())[0] |
|
for dict_ in list_of_dicts: |
|
key, value = list(dict_.items())[0] |
|
if key == target_key and value == target_value: |
|
return True |
|
return False |
|
|
|
|
|
def submitted(): |
|
st.session_state.next = True |
|
|
|
|
|
@st.cache_data() |
|
def sort_names_by_similarity(target_name, name_list): |
|
# Get a list of tuples (name, score) |
|
scored_names = process.extract(target_name, name_list, limit=len(name_list)) |
|
|
|
# Sort the list of tuples by score in descending order |
|
scored_names.sort(key=lambda x: x[1], reverse=True) |
|
|
|
# Extract the sorted list of names |
|
sorted_names = [name for name, score in scored_names] |
|
|
|
return sorted_names |
|
|
|
|
|
@st.cache_data() |
|
def get_persons(): |
|
return list(arango.db.collection("persons").all()) |
|
|
|
|
|
@st.cache_data() |
|
def get_unverified_persons(): |
|
q = "for doc in persons filter doc.confirmed != true return doc" |
|
return [i for i in db.aql.execute(q)] |
|
|
|
|
|
@st.cache_data() |
|
def get_suggestions(person): |
|
|
|
suggestion = identify(person) |
|
st.session_state.unverified_person = suggestion["unverified_person"] |
|
st.session_state.found_person = suggestion["found_person"] |
|
st.session_state.suggestions = suggestion["suggestions"] |
|
|
|
|
|
def caps(string): |
|
if "*" not in string: |
|
string = string.upper() |
|
else: |
|
string = string.replace("*", "") |
|
return string |
|
|
|
|
|
def get_unverified_person(): |
|
try: |
|
n = randint(0, len(st.session_state.unverified_persons) - 1) |
|
doc = st.session_state.unverified_persons.pop(n) |
|
st.session_state.unverified_person = UnverifiedPerson(doc) |
|
except ValueError: |
|
st.markdown(":green[Inga fler personer att verifiera.]") |
|
st.stop() |
|
|
|
|
|
st.set_page_config( |
|
page_title="Malå", |
|
) |
|
|
|
# Get URL parameters |
|
params = st.query_params |
|
param_person_key = params.get("person_key", None) |
|
|
|
db = arango.db |
|
|
|
# Add a session state to store the persons and unconfirmed persons etc |
|
if "next" not in st.session_state: |
|
st.session_state.next = False |
|
if "persons" not in st.session_state: |
|
st.session_state.persons = get_persons() |
|
|
|
all_persons_name_list = [] |
|
for person in st.session_state.persons: |
|
name = person["name"] |
|
if not person["confirmed"]: |
|
name += "*" |
|
all_persons_name_list.append(name) |
|
st.session_state.persons_names = all_persons_name_list |
|
st.session_state.persons_dict = { |
|
i["name"]: i["_key"] for i in st.session_state.persons |
|
} |
|
|
|
if "unverified_persons" not in st.session_state: |
|
if param_person_key: |
|
# If a person key is provided in the URL, only show that person |
|
st.session_state.unverified_persons = list( |
|
db.aql.execute( |
|
"for doc in persons filter doc._key == @key return doc", |
|
bind_vars={"key": param_person_key}, |
|
) |
|
) |
|
print_blue("param_person_key".upper(), st.session_state.unverified_persons) |
|
else: |
|
st.session_state.unverified_persons = get_unverified_persons() |
|
|
|
if "persons_names" not in st.session_state: |
|
st.session_state.persons_names = arango.get_persons(confirmed=False)["names"] |
|
|
|
if "user_choice" not in st.session_state: |
|
st.session_state.user_choice = None |
|
if "unconfirmed_choice" not in st.session_state: |
|
st.session_state.unconfirmed_choice = None |
|
if "custom_choice" not in st.session_state: |
|
st.session_state.custom_choice = None |
|
|
|
if ( |
|
"unverified_person" not in st.session_state |
|
or not st.session_state.unverified_persons |
|
): |
|
get_unverified_person() |
|
|
|
if "found_person" not in st.session_state: |
|
st.session_state.found_person = None |
|
if "suggestions" not in st.session_state: |
|
# Set new values for unverified_person, found_person and suggestions (as session_state) |
|
get_suggestions(st.session_state.unverified_person.__dict__) |
|
print_yellow("SUGGESTIONS", st.session_state.suggestions) |
|
|
|
if "suggestion" not in st.session_state: |
|
st.session_state.suggestion = st.session_state.suggestions.pop(0) |
|
|
|
|
|
# Get unconfirmed person, found person and answer from the suggestions |
|
unverified_person: UnverifiedPerson = st.session_state.unverified_person |
|
found_person: FoundPerson = st.session_state.found_person |
|
|
|
answer = st.session_state.suggestion[0] |
|
interrogation_doc = st.session_state.suggestion[1] |
|
if isinstance(interrogation_doc, str): |
|
interrogation_doc = db.collection("interrogations").get(interrogation_doc) |
|
text = interrogation_doc["text"] |
|
|
|
|
|
st.markdown( |
|
f'Namnet **"{st.session_state.unverified_person.name}"** används i **{len(st.session_state.unverified_person.mentioned_in_interrogation)}** förhör. Namnet kan syfta på olika personer i olika sammanhang så vi går igenom förhören ett och ett.' |
|
) |
|
|
|
if answer: |
|
answer = answer.replace("\n", " ") |
|
st.markdown( |
|
f"Är :blue[{unverified_person.name}] samma som :blue[{found_person.name}]?" |
|
) |
|
print(found_person.__dict__) |
|
st.write(f'(från förhör med {interrogation_doc["person"]})') |
|
if "JA" in answer: |
|
st.markdown(f"🤖\n:green[{answer.replace('JA ', '')}]") |
|
radio_index = 0 |
|
elif "NEJ" in answer: |
|
radio_index = 1 |
|
st.markdown(f"🤖\n:red[{answer.replace('NEJ ', '')}]") |
|
else: |
|
radio_index = None |
|
st.markdown(f"🤖\n{answer}") |
|
|
|
# Let the user expand for more info |
|
|
|
else: |
|
st.markdown(f"Vem är :blue[{unverified_person.name}]?") |
|
st.write(f'(från förhör med {interrogation_doc["person"]})') |
|
# Show the information about the suggested person |
|
|
|
|
|
# Edit and show the interrogation text |
|
with st.expander(f"Mer information om förhöret"): |
|
text = text.replace("\nFL:", "<br>**FL:** ").replace("\nDH:", "<br>**DH:** ") |
|
text = re.sub(r"\n(?!\n)", "", text) |
|
text = re.sub(r"\n\n+", "\n", text) |
|
text = text.replace("\n", "<br>").replace( |
|
unverified_person.name, f"**:red[{unverified_person.name}]**" |
|
) |
|
st.markdown(f"##### Förhöret:\n{text}", unsafe_allow_html=True) |
|
|
|
# A form to let the user select an alternative |
|
with st.form("select_alternative"): |
|
|
|
if answer: |
|
# Let the user decide if the suggested person is the same as the unconfirmed person |
|
st.session_state.user_choice = st.radio( |
|
"Select alternative", |
|
("Ja", "Nej", "Vet ej"), |
|
key="user_choice_radio", |
|
index=radio_index, |
|
) |
|
|
|
else: |
|
st.session_state.user_choice = None |
|
|
|
# Let the user select an alternative person |
|
alternatives = sort_names_by_similarity( |
|
unverified_person.name, st.session_state.persons_names |
|
) |
|
|
|
st.session_state.unconfirmed_choice = st.selectbox( |
|
"Välj någon som stämmer", |
|
alternatives, |
|
placeholder="Sök en annan", |
|
index=None, |
|
key="multiselect", |
|
format_func=caps, |
|
help="Personer i caps är bekräftade personer, välj någon av dem om det verkar stämma.", |
|
) |
|
|
|
# Let the user enter a custom alternative |
|
st.session_state.custom_choice = st.text_input( |
|
"Annan person", |
|
key="custom", |
|
help="Skriv in namnet på personen om det inte finns i listan. Var noga med stavningen.", |
|
) |
|
st.session_state.custom_choice = ( |
|
None if st.session_state.custom_choice == "" else st.session_state.custom_choice |
|
) |
|
|
|
# If the user has made a selection |
|
st.session_state.next = st.form_submit_button("Nästa", on_click=submitted) |
|
|
|
if st.session_state.next: |
|
if st.session_state.custom_choice: |
|
print("CUSTOM CHOICE", st.session_state.custom_choice) |
|
llm = LLM() |
|
info = llm.generate( |
|
f'Nedan är ett polisförhör där en person omnämns som "{unverified_person.name}".\n\n{interrogation_doc["text"]}\n\nSammanfatta informationen om {unverified_person.name} på ett detaljerat sätt, var noga med namn, platser, händelser och relationer. Använd bara sånt som finns i informationen. Svara ENBART med sammanfattningen, ingenting annat. ' |
|
) |
|
person_in_arango = db.collection("persons").insert( |
|
{ |
|
"_key": arango.fix_key_name(st.session_state.custom_choice), |
|
"name": st.session_state.custom_choice, |
|
"info": [info], |
|
"mentioned_in_interrogation": [interrogation_doc["_key"]], |
|
"mentioned_as": [{unverified_person.name: interrogation_doc["_key"]}], |
|
} |
|
) |
|
verify( |
|
db, |
|
"Yes", |
|
unverified_person.doc, |
|
found_person.doc, |
|
interrogation_key=interrogation_doc["_key"], |
|
) |
|
|
|
elif st.session_state.unconfirmed_choice: |
|
|
|
unconfirmed_choice = st.session_state.unconfirmed_choice.replace( |
|
"*", "" |
|
).strip() |
|
|
|
print_yellow("OTHER CHOICE", unconfirmed_choice) |
|
doc = db.collection("persons").get( |
|
st.session_state.persons_dict[unconfirmed_choice] |
|
) |
|
found_person = FoundPerson( |
|
db, unconfirmed_choice, st.session_state.persons_dict[unconfirmed_choice] |
|
) |
|
print("NEW:", found_person.name) |
|
|
|
verify(db, "Yes", unverified_person, found_person, interrogation_doc["_key"]) |
|
|
|
elif st.session_state.user_choice == "Ja": |
|
print("USER CHOICE", st.session_state.user_choice) |
|
if "mentioned_as" not in found_person.doc: |
|
found_person.doc["mentioned_as"] = [] |
|
if not check_if_dict_in_list( |
|
{person["name"]: interrogation_doc["_key"]}, |
|
found_person.doc["mentioned_as"], |
|
): |
|
found_person.doc["mentioned_as"].append( |
|
{person["name"]: interrogation_doc["_key"]} |
|
) |
|
verify( |
|
db, |
|
answer="Yes", |
|
person=person, |
|
person_in_arango=found_person.doc, |
|
interrogation_key=interrogation_doc["_key"], |
|
) |
|
elif st.session_state.user_choice == "Nej": |
|
verify( |
|
db, |
|
"No", |
|
unverified_person=unverified_person.doc, |
|
found_person=found_person.doc, |
|
interrogation_key=interrogation_doc["_key"], |
|
) |
|
|
|
elif st.session_state.user_choice == "Vet ej": |
|
verify( |
|
db, |
|
"Unknown", |
|
unverified_person=unverified_person.doc, |
|
interrogation_key=interrogation_doc["_key"], |
|
) |
|
|
|
reset_choices() |
|
if not param_person_key: |
|
if st.session_state.suggestions != []: |
|
st.session_state.suggestion = st.session_state.suggestions.pop(0) |
|
else: |
|
get_unverified_person() |
|
get_suggestions(st.session_state.unverified_person.__dict__) |
|
st.session_state.suggestion = st.session_state.suggestions.pop(0) |
|
st.rerun() |
|
else: |
|
st.markdown(":green[Tack!] Du kan stäna de här fliken nu.") |
|
st.stop()
|
|
|