sci/transcribe_audio.py

import io
import os
import requests
from pydub import AudioSegment
import streamlit as st

def streamlit_audio(uploaded_file):
    if uploaded_file is not None:
        # Read the uploaded file into a BytesIO buffer
        file_extension = os.path.splitext(uploaded_file.name)[1].lower()
        filename = uploaded_file.name
        input_file_buffer = io.BytesIO(uploaded_file.getvalue())

        progress_bar = st.progress(0)
        status_text = st.empty()

        if file_extension in ['.m4a', '.mp3', '.wav', '.flac']:
            # Handle audio files
            audio = AudioSegment.from_file(input_file_buffer, format=file_extension.replace('.', ''))
            audio = audio.set_channels(1)  # Convert to mono
            file_buffer = io.BytesIO()
            audio.export(file_buffer, format="mp3", bitrate="64k")
            file_buffer.seek(0)
            progress_bar.progress(50)
            status_text.text("Audio file converted.")
        else:
            st.error("Unsupported file type")
            st.stop()

        # Send the converted audio data to the transcription service
        try:
            response = transcribe(file_buffer, filename)
            response_json = response.json()
            progress_bar.progress(100)
            status_text.text("File uploaded and processed.")

            if response.status_code == 200:
                transcription_content = response_json.get("transcription", "")
                st.subheader("Transcription")
                st.text_area("Transcription Content", transcription_content, height=300)
                transcription_filename = os.path.splitext(filename)[0] + '.vtt'
                st.download_button(
                    label="Download Transcription",
                    data=transcription_content,
                    file_name=transcription_filename,
                    mime='text/vtt'
                )
            else:
                st.error("Failed to upload and process the file.")
        except requests.exceptions.Timeout:
            st.error("The request timed out. Please try again later.")

def transcribe(file_buffer, filename):
    url = "http://98.128.172.165:4001/upload"
    # Prepare the files dictionary for the POST request
    files = {'file': (filename, file_buffer, 'audio/mp3')}
    # Send the POST request with the file buffer
    response = requests.post(url, files=files, timeout=3600)
    return response