From dba250b7982dbeae55a3e75b41478556f53105ab Mon Sep 17 00:00:00 2001
From: Christian Rute <chris@serve2calc.com>
Date: Thu, 16 Jan 2025 17:00:39 +0100
Subject: [PATCH] add support for dynamic speech length

---
 app.py | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/app.py b/app.py
index dafbff5..0d7f682 100644
--- a/app.py
+++ b/app.py
@@ -45,7 +45,17 @@ def start_recording():
     is_recording = True
     start_time = time.time()  # Record the start time
     print("Recording started...")
-    recorded_audio = sd.rec(int(SAMPLE_RATE * 60), samplerate=SAMPLE_RATE, channels=1, dtype=np.float32)
+    recorded_audio = []  # Start with an empty list to store audio chunks
+
+    # Start recording continuously
+    def callback(indata, frames, time, status):
+        if is_recording:  # Append audio data only while recording
+            recorded_audio.append(indata.copy())
+
+    stream = sd.InputStream(
+        samplerate=SAMPLE_RATE, channels=1, dtype=np.float32, callback=callback
+    )
+    stream.start()  # Start the stream
     return "Recording... Click 'Stop Recording' to finish."
 
 
@@ -54,12 +64,15 @@ def stop_recording():
     global is_recording, recorded_audio, start_time
     if not is_recording:
         return "Not recording!"
-    sd.stop()
     is_recording = False
     elapsed_time = time.time() - start_time  # Calculate elapsed time
     print(f"Recording stopped. Duration: {elapsed_time:.2f} seconds.")
-    save_audio_to_wav(recorded_audio[:int(SAMPLE_RATE * elapsed_time)], FILENAME)  # Truncate to actual duration
-    return "Recording stopped. Click 'Transcribe' to see the result."
+
+    # Combine all recorded chunks into a single array
+    audio_data = np.concatenate(recorded_audio, axis=0)
+    save_audio_to_wav(audio_data[: int(SAMPLE_RATE * elapsed_time)], FILENAME)
+    return f"Recording stopped. Duration: {elapsed_time:.2f} seconds. Click 'Transcribe' to see the result."
+
 
 
 def save_audio_to_wav(audio, filename):
@@ -99,19 +112,25 @@ def transcribe_audio():
             chunk_filename = f"chunk_{i}.wav"
             chunk.export(chunk_filename, format="wav")
             print(f"Transcribing chunk {i + 1}/{len(chunks)}...")
+
+            # Transcribe the chunk
             result = pipe(chunk_filename)
             transcription.append(result["text"])
 
             # Clean up temporary chunk file
             os.remove(chunk_filename)
 
+            # Stream intermediate transcription
+            yield f"{' '.join(transcription)}"
+
         print("Transcription complete.")
-        return " ".join(transcription)
+        yield f"{' '.join(transcription)}"
     else:
         print(f"Audio is short enough ({duration:.2f} seconds). Transcribing directly...")
         result = pipe(FILENAME)
         print("Transcription complete.")
-        return result["text"]
+        yield f"{result['text']}"
+
 
 
 # Gradio Interface