hindenburg/streamlit_transcribe.py

import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "src")))

import streamlit as st
import xml.etree.ElementTree as ET
from hindenburg_api.transcription import Transcription, transcribe
from hindenburg_api.project import Project

# Remove hardcoded paths, we'll select them in the UI
# PROJECT_PATH = "tests/demo_project/demo.nhsx"
# AUDIOPOOL_PATH = "tests/demo_project/"

# Remove the get_audio_files_from_project function since it's now in the Project class

def main():
    st.title("Hindenburg Project Transcription Tool")

    # File uploader for project file
    uploaded_file = st.file_uploader("Choose a Hindenburg project file (.nhsx)", type="nhsx")

    if uploaded_file is None:
        st.info("Please upload a project file to continue")
        return

    # Save the uploaded file to a temporary location
    temp_project_path = f"temp_{uploaded_file.name}"
    with open(temp_project_path, "wb") as f:
        f.write(uploaded_file.getbuffer())

    st.success(f"Project file loaded: {uploaded_file.name}")

    # Get the project directory (parent directory of the project file)
    project_dir = os.path.dirname(temp_project_path)

    # Load the project
    project = Project(temp_project_path)
    project.load_project()

    # Get the audio pool path from the project XML
    audio_pool_path = ""
    tree = ET.parse(temp_project_path)
    root = tree.getroot()
    audio_pool_elem = root.find("AudioPool")
    if audio_pool_elem is not None:
        pool_path = audio_pool_elem.get("Path", "")
        pool_location = audio_pool_elem.get("Location", "")
        if pool_location and os.path.exists(pool_location):
            audio_pool_path = os.path.join(pool_location, pool_path)
        else:
            # Try different strategies to find the audio files
            potential_paths = [
                os.path.join(os.path.dirname(temp_project_path), pool_path),  # Look in same dir as project
                pool_location,  # Use location directly
                os.path.join(project_dir, os.path.basename(pool_location))  # Use basename
            ]

            for path in potential_paths:
                if path and os.path.exists(path):
                    audio_pool_path = path
                    break

    if not audio_pool_path or not os.path.exists(audio_pool_path):
        # Allow user to select the audio files directory
        st.warning("Could not automatically locate audio files directory.")
        audio_dir = st.text_input("Enter the path to your audio files directory:")
        if audio_dir and os.path.exists(audio_dir):
            audio_pool_path = audio_dir
        else:
            st.error("Please provide a valid audio files directory path.")
            return

    st.success(f"Audio files directory found: {audio_pool_path}")

    # Get audio files from the project
    audio_files = project.get_audio_files()

    if not audio_files:
        st.warning("No audio files found in the project")
        return

    st.write("Select files to transcribe:")

    # Create a container for the file list
    file_container = st.container()

    selected = []
    with file_container:
        for f in audio_files:
            # Add a unique key for each file's state
            file_key = f"file_{f['id']}"
            if file_key not in st.session_state:
                st.session_state[file_key] = {
                    "selected": False,
                    "min_speakers": 2,
                    "max_speakers": 2
                }

            # Display file info
            col1, col2, col3, col4 = st.columns([3, 2, 2, 1])
            with col1:
                # Use display_name if available, otherwise fall back to name
                display_name = f.get("display_name", f["name"])
                st.write(display_name)
            with col2:
                st.write(f["duration"])
            with col3:
                st.write("Yes" if f["has_transcription"] else "No")
            with col4:
                # Use a label to avoid accessibility warnings
                checked = st.checkbox("Select", key=f"chk_{f['id']}", value=False,
                                    disabled=f["has_transcription"],
                                    label_visibility="collapsed")

                if checked and not f["has_transcription"]:
                    # Expand settings for selected files
                    with st.expander(f"Settings for {display_name}", expanded=False):
                        col_min, col_max = st.columns(2)
                        with col_min:
                            min_speakers = st.number_input("Min Speakers",
                                                          min_value=1,
                                                          max_value=5,
                                                          value=2,
                                                          key=f"min_{f['id']}")
                        with col_max:
                            max_speakers = st.number_input("Max Speakers",
                                                          min_value=min_speakers,
                                                          max_value=5,
                                                          value=2,
                                                          key=f"max_{f['id']}")

                    # Add to selected files with speaker settings
                    f_with_settings = f.copy()
                    f_with_settings["min_speakers"] = min_speakers
                    f_with_settings["max_speakers"] = max_speakers
                    selected.append(f_with_settings)

    if st.button("Transcribe Selected"):
        if not selected:
            st.warning("Please select at least one file to transcribe")
            return

        progress_bar = st.progress(0)
        status_text = st.empty()
        transcription_successful = False

        for i, f in enumerate(selected):
            status_text.write(f"Transcribing {f['name']}...")

            # Try different places to find the audio file
            audio_file_found = False
            potential_audio_paths = [
                os.path.join(audio_pool_path, f["name"]),
                os.path.join(audio_pool_path, "demo Files", f["name"]),
                os.path.join(audio_pool_path, "..", "demo Files", f["name"]),
                os.path.join(pool_location, f["name"]) if 'pool_location' in locals() else None
            ]

            for audio_path in potential_audio_paths:
                if audio_path and os.path.exists(audio_path):
                    audio_file_found = True
                    break

            if not audio_file_found:
                st.error(f"Audio file not found: {f['name']}")
                continue

            try:
                # Get min and max speakers from the file settings
                min_speakers = f.get("min_speakers", 2)
                max_speakers = f.get("max_speakers", 3)

                st.info(f"Using {min_speakers} min and {max_speakers} max speakers for {f['name']}")

                segments = transcribe(audio_path, min_speakers=min_speakers, max_speakers=max_speakers)
                transcription = Transcription()
                transcription.add_segments(segments)
                xml_str = transcription.to_xml()
                project.add_transcription(f["id"], xml_str)
                project.save_project()
                st.success(f"Transcribed and saved: {f['name']}")
                transcription_successful = True
            except Exception as e:
                st.error(f"Error transcribing {f['name']}: {str(e)}")

            # Update progress
            progress_bar.progress((i + 1) / len(selected))

        status_text.write("Transcription complete!")

        # Offer download if transcription was successful
        if transcription_successful:
            with open(temp_project_path, "rb") as file:
                btn = st.download_button(
                    label="Download transcribed project",
                    data=file,
                    file_name=uploaded_file.name,
                    mime="application/xml"
                )

        # Don't delete the temp file yet as the user might want to download it
        # We could add a cleanup button or do it on session end

if __name__ == "__main__":
    main()