working with all length
This commit is contained in:
parent
5ae6356c04
commit
fb0714fdae
42
stt_test.py
42
stt_test.py
@ -5,6 +5,8 @@ import numpy as np
|
|||||||
import wave
|
import wave
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
import time
|
import time
|
||||||
|
from pydub import AudioSegment
|
||||||
|
import os
|
||||||
|
|
||||||
# Setup device
|
# Setup device
|
||||||
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
||||||
@ -69,13 +71,45 @@ def save_audio_to_wav(audio, filename):
|
|||||||
wf.setframerate(SAMPLE_RATE)
|
wf.setframerate(SAMPLE_RATE)
|
||||||
wf.writeframes(audio.tobytes())
|
wf.writeframes(audio.tobytes())
|
||||||
|
|
||||||
|
def get_audio_duration(filename):
|
||||||
|
"""Returns the duration of the audio file in seconds."""
|
||||||
|
audio = AudioSegment.from_wav(filename)
|
||||||
|
return len(audio) / 1000 # Convert milliseconds to seconds
|
||||||
|
|
||||||
|
def split_audio(filename, chunk_length_ms=30000):
|
||||||
|
"""Splits an audio file into chunks."""
|
||||||
|
audio = AudioSegment.from_wav(filename)
|
||||||
|
chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
def transcribe_audio():
|
def transcribe_audio():
|
||||||
"""Transcribes the audio file using Whisper."""
|
"""Transcribes the audio file using Whisper."""
|
||||||
print("Transcribing...")
|
print("Checking audio duration...")
|
||||||
result = pipe(FILENAME)
|
duration = get_audio_duration(FILENAME)
|
||||||
print("Transcription complete.")
|
|
||||||
return result["text"]
|
if duration > 30:
|
||||||
|
print(f"Audio is too long ({duration:.2f} seconds). Splitting into chunks...")
|
||||||
|
chunks = split_audio(FILENAME)
|
||||||
|
transcription = []
|
||||||
|
|
||||||
|
for i, chunk in enumerate(chunks):
|
||||||
|
chunk_filename = f"chunk_{i}.wav"
|
||||||
|
chunk.export(chunk_filename, format="wav")
|
||||||
|
print(f"Transcribing chunk {i + 1}/{len(chunks)}...")
|
||||||
|
result = pipe(chunk_filename)
|
||||||
|
transcription.append(result["text"])
|
||||||
|
|
||||||
|
# Clean up temporary chunk file
|
||||||
|
os.remove(chunk_filename)
|
||||||
|
|
||||||
|
print("Transcription complete.")
|
||||||
|
return " ".join(transcription)
|
||||||
|
else:
|
||||||
|
print(f"Audio is short enough ({duration:.2f} seconds). Transcribing directly...")
|
||||||
|
result = pipe(FILENAME)
|
||||||
|
print("Transcription complete.")
|
||||||
|
return result["text"]
|
||||||
|
|
||||||
|
|
||||||
# Gradio Interface
|
# Gradio Interface
|
||||||
|
Loading…
Reference in New Issue
Block a user