working with all length

2025-01-06 18:20:43 +01:00 · 2025-01-06 18:20:43 +01:00 · fb0714fdae
commit fb0714fdae
parent 5ae6356c04
1 changed files with 38 additions and 4 deletions
--- a/stt_test.py
+++ b/stt_test.py
@ -5,6 +5,8 @@ import numpy as np
 import wave
 import gradio as gr
 import time
+from pydub import AudioSegment
+import os

 # Setup device
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
@ -69,13 +71,45 @@ def save_audio_to_wav(audio, filename):
        wf.setframerate(SAMPLE_RATE)
        wf.writeframes(audio.tobytes())

+def get_audio_duration(filename):
+    """Returns the duration of the audio file in seconds."""
+    audio = AudioSegment.from_wav(filename)
+    return len(audio) / 1000  # Convert milliseconds to seconds
+
+def split_audio(filename, chunk_length_ms=30000):
+    """Splits an audio file into chunks."""
+    audio = AudioSegment.from_wav(filename)
+    chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
+    return chunks
+

 def transcribe_audio():
    """Transcribes the audio file using Whisper."""
-    print("Transcribing...")
-    result = pipe(FILENAME)
-    print("Transcription complete.")
-    return result["text"]
+    print("Checking audio duration...")
+    duration = get_audio_duration(FILENAME)
+
+    if duration > 30:
+        print(f"Audio is too long ({duration:.2f} seconds). Splitting into chunks...")
+        chunks = split_audio(FILENAME)
+        transcription = []
+
+        for i, chunk in enumerate(chunks):
+            chunk_filename = f"chunk_{i}.wav"
+            chunk.export(chunk_filename, format="wav")
+            print(f"Transcribing chunk {i + 1}/{len(chunks)}...")
+            result = pipe(chunk_filename)
+            transcription.append(result["text"])
+
+            # Clean up temporary chunk file
+            os.remove(chunk_filename)
+
+        print("Transcription complete.")
+        return " ".join(transcription)
+    else:
+        print(f"Audio is short enough ({duration:.2f} seconds). Transcribing directly...")
+        result = pipe(FILENAME)
+        print("Transcription complete.")
+        return result["text"]


 # Gradio Interface