import streamlit as st import fitz from fitz import Page, Document from _llm import LLM import re from person_identifier import PersonFinder from print_color import * def set_name(): st.session_state.name = st.session_state.names.pop(0) def highlight_name_in_pdf(page: Page, name: str): # Search for the word in the page rectangles = page.search_for(name, quads=True) # Highlight the found words page.add_highlight_annot(rectangles) # Convert the page to a pixmap pixmap = page.get_pixmap(dpi=300) # Save the pixmap to a new image file pixmap.save(image_filename, 'png') def show_image(filename): # Display the new image file in Streamlit st.image(filename) def get_page(page_number): # Create a new document new_doc = fitz.open() new_doc.insert_pdf(doc, from_page=page_number, to_page=page_number) page = new_doc[0] page.set_cropbox(fitz.Rect(0, 100, 520, 800)) return page @st.cache_resource() def get_extractor(): return PersonFinder() st.set_page_config(layout="wide") filename = "Förhörsprotokoll.pdf" image_filename = "highlighted.png" page_number = 89 if 'doc' not in st.session_state: st.session_state.doc = fitz.open(filename) doc = st.session_state.doc page = get_page(page_number) if 'all_names' not in st.session_state: st.session_state.all_names = {} if 'names' not in st.session_state or st.session_state.names is None: person_extractor = PersonFinder(st.session_state.all_names) st.session_state.names = person_extractor.extract_names(person_extractor, page.get_text()) st.session_state.all_names = person_extractor.names print_blue(st.session_state.names) print_purple(st.session_state.all_names) names = st.session_state.names if 'name' not in st.session_state: st.session_state.name = names.pop(0) name = st.session_state.name st.markdown(f'#### {name}') highlight_name_in_pdf(page, name) col1, col2 = st.columns([5,2]) with col1: show_image(image_filename) with col2: next = st.button("Next", on_click=set_name) if next: if len(names) == 0: st.session_state.names = None else: highlight_name_in_pdf(page, st.session_state.name)