From dba250b7982dbeae55a3e75b41478556f53105ab Mon Sep 17 00:00:00 2001 From: Christian Rute Date: Thu, 16 Jan 2025 17:00:39 +0100 Subject: [PATCH] add support for dynamic speech length --- app.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/app.py b/app.py index dafbff5..0d7f682 100644 --- a/app.py +++ b/app.py @@ -45,7 +45,17 @@ def start_recording(): is_recording = True start_time = time.time() # Record the start time print("Recording started...") - recorded_audio = sd.rec(int(SAMPLE_RATE * 60), samplerate=SAMPLE_RATE, channels=1, dtype=np.float32) + recorded_audio = [] # Start with an empty list to store audio chunks + + # Start recording continuously + def callback(indata, frames, time, status): + if is_recording: # Append audio data only while recording + recorded_audio.append(indata.copy()) + + stream = sd.InputStream( + samplerate=SAMPLE_RATE, channels=1, dtype=np.float32, callback=callback + ) + stream.start() # Start the stream return "Recording... Click 'Stop Recording' to finish." @@ -54,12 +64,15 @@ def stop_recording(): global is_recording, recorded_audio, start_time if not is_recording: return "Not recording!" - sd.stop() is_recording = False elapsed_time = time.time() - start_time # Calculate elapsed time print(f"Recording stopped. Duration: {elapsed_time:.2f} seconds.") - save_audio_to_wav(recorded_audio[:int(SAMPLE_RATE * elapsed_time)], FILENAME) # Truncate to actual duration - return "Recording stopped. Click 'Transcribe' to see the result." + + # Combine all recorded chunks into a single array + audio_data = np.concatenate(recorded_audio, axis=0) + save_audio_to_wav(audio_data[: int(SAMPLE_RATE * elapsed_time)], FILENAME) + return f"Recording stopped. Duration: {elapsed_time:.2f} seconds. Click 'Transcribe' to see the result." + def save_audio_to_wav(audio, filename): @@ -99,19 +112,25 @@ def transcribe_audio(): chunk_filename = f"chunk_{i}.wav" chunk.export(chunk_filename, format="wav") print(f"Transcribing chunk {i + 1}/{len(chunks)}...") + + # Transcribe the chunk result = pipe(chunk_filename) transcription.append(result["text"]) # Clean up temporary chunk file os.remove(chunk_filename) + # Stream intermediate transcription + yield f"{' '.join(transcription)}" + print("Transcription complete.") - return " ".join(transcription) + yield f"{' '.join(transcription)}" else: print(f"Audio is short enough ({duration:.2f} seconds). Transcribing directly...") result = pipe(FILENAME) print("Transcription complete.") - return result["text"] + yield f"{result['text']}" + # Gradio Interface