- Esta versão fornece legenda melhor sincronizada à fala.
- A qualidade do reconhecimento da fala está ligada à qualidade do modelo de idioma usado.
import os
import subprocess
import vosk
import pysrt
import json
from flask import Flask, render_template, request
from werkzeug.utils import secure_filename
from moviepy.editor import VideoFileClip
app = Flask(__name__)
UPLOAD_FOLDER = 'uploads'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
def segundos_para_subrip_time(segundos):
minutos, segundos = divmod(segundos, 60)
horas, minutos = divmod(minutos, 60)
return pysrt.SubRipTime(hours=int(horas), minutes=int(minutos), seconds=segundos)
def recognize_speech(file_path, language):
if language == "en":
model_path = "vosk-model-small-en-us-0.15"
elif language == "pt":
model_path = "vosk-model-small-pt-0.3" #model_path = "vosk-model-pt-fb-v0.1.1-20220516_2113" #Para melhor reconhecimento da fala.
elif language == "es":
model_path = "vosk-model-small-es-0.42"
else:
raise ValueError("Idioma não suportado.")
if not os.path.isfile(file_path):
raise FileNotFoundError("O arquivo de vídeo não existe.")
sample_rate = 16000
model = vosk.Model(model_path)
rec = vosk.KaldiRecognizer(model, sample_rate)
rec.SetWords(True)
audio_duration = VideoFileClip(file_path).duration
ffmpeg_command = f"ffmpeg -y -i {file_path} -vn -acodec pcm_s16le -ar {sample_rate} -ac 1 -f wav -"
ffmpeg_process = subprocess.Popen(ffmpeg_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
subtitles = []
current_subtitle = None
subtitle_index = 1
while True:
audio_data = ffmpeg_process.stdout.read(sample_rate * 2)
if not audio_data:
break
rec.AcceptWaveform(audio_data)
result = json.loads(rec.Result())
if "result" in result:
words = result["result"]
if not words:
break
start_time = words[0]["start"]
end_time = words[-1]["end"]
transcript = " ".join([word["word"] for word in words])
if current_subtitle:
current_subtitle.text += " " + transcript
current_subtitle.end = segundos_para_subrip_time(end_time)
else:
current_subtitle = pysrt.SubRipItem(
index=subtitle_index, start=segundos_para_subrip_time(start_time), end=segundos_para_subrip_time(end_time), text=transcript
)
subtitle_index += 1
# Se a legenda tiver mais de 9 palavras, finalizamos a legenda
if len(current_subtitle.text.split()) >= 9:
subtitles.append(current_subtitle)
current_subtitle = None
else:
if current_subtitle:
subtitles.append(current_subtitle)
current_subtitle = None
if current_subtitle:
subtitles.append(current_subtitle)
ffmpeg_process.stdout.close()
ffmpeg_process.wait()
return subtitles
@app.route("/", methods=["GET", "POST"])
def index():
if request.method == "POST":
file = request.files["video_file"]
language = request.form["language_choice"]
if file and file.filename.endswith(".mp4"):
if not os.path.exists(app.config['UPLOAD_FOLDER']):
os.makedirs(app.config['UPLOAD_FOLDER'])
file_path = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(file.filename))
file.save(file_path)
try:
subtitles = recognize_speech(file_path, language)
output_file = os.path.splitext(file_path)[0]
if language == "en":
output_file += "_ing.srt"
elif language == "pt":
output_file += "_por.srt"
elif language == "es":
output_file += "_esp.srt"
with open(output_file, "w", encoding="utf-8") as f:
for subtitle in subtitles:
f.write(str(subtitle))
f.write("\n")
message = f"Processo concluído. Legenda gerada em {output_file}"
except Exception as e:
message = f"Erro durante o reconhecimento: {str(e)}"
else:
message = "Selecione um arquivo de vídeo no formato .mp4."
return render_template("index.html", message=message)
return render_template("index.html")
if __name__ == "__main__":
app.run(debug=True)
Nenhum comentário:
Postar um comentário