ShortGenerator/subtitle_generator.py

262 lines
8.9 KiB
Python

import os
import math
import tempfile
import moviepy as mp
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
from pydub import AudioSegment
from pydub.silence import split_on_silence
import threading
from faster_whisper import WhisperModel
def format_time(seconds):
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = int(seconds % 60)
millis = int((seconds - int(seconds)) * 1000)
return f"{hours:02}:{minutes:02}:{secs:02},{millis:03}"
def wrap_text(text, max_len=40):
"""
Wraps text to ~max_len characters per line without cutting words.
"""
words = text.split()
lines = []
current_line = ""
for word in words:
if len(current_line + " " + word) <= max_len:
current_line += (" " if current_line else "") + word
else:
lines.append(current_line)
current_line = word
if current_line:
lines.append(current_line)
return "\n".join(lines)
def write_srt(subtitles, output_path):
with open(output_path, 'w', encoding='utf-8') as f:
for i, sub in enumerate(subtitles, 1):
f.write(f"{i}\n")
f.write(f"{format_time(sub['start'])} --> {format_time(sub['end'])}\n")
f.write(f"{wrap_text(sub['text'])}\n\n")
def transcribe_video_to_srt(video_path, srt_output_path, progress_callback=None, model_size="base", language="auto"):
try:
print("📽️ Loading video file...")
video = mp.VideoFileClip(video_path)
audio = video.audio
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
temp_audio_path = temp_audio_file.name
print("🔊 Extracting audio...")
audio.write_audiofile(temp_audio_path, logger=None)
print(f"🤖 Loading Whisper model ({model_size})...")
# Initialize Whisper model - much more accurate than Google Speech Recognition
model = WhisperModel(model_size, device="cpu", compute_type="int8")
print("🎯 Transcribing with Whisper AI...")
# Transcribe the entire audio file at once - Whisper handles timing automatically
segments, info = model.transcribe(
temp_audio_path,
language=None if language == "auto" else language,
word_timestamps=True,
vad_filter=True, # Voice Activity Detection for better accuracy
vad_parameters=dict(min_silence_duration_ms=500)
)
print(f"🌍 Detected language: {info.language} (confidence: {info.language_probability:.2f})")
subtitles = []
segment_list = list(segments) # Convert generator to list for progress tracking
print(f"📝 Processing {len(segment_list)} speech segments...")
for i, segment in enumerate(segment_list):
# Whisper provides precise timing and text
start_time = segment.start
end_time = segment.end
text = segment.text.strip()
if text and len(text) > 0:
subtitles.append({
"start": start_time,
"end": end_time,
"text": text
})
print(f"✅ Segment {i+1}: '{text[:50]}...' ({start_time:.1f}s - {end_time:.1f}s)")
# Update progress bar
if progress_callback:
progress_callback(i + 1, len(segment_list))
# Clean up
if os.path.exists(temp_audio_path):
os.remove(temp_audio_path)
if video:
video.close()
if audio:
audio.close()
print(f"🎯 Generated {len(subtitles)} subtitle segments with Whisper AI")
write_srt(subtitles, srt_output_path)
return True
except Exception as e:
print(f"❌ Error: {e}")
return False
# -------------------- GUI --------------------
def select_file_and_generate():
video_path = filedialog.askopenfilename(
title="Select a video file",
filetypes=[("Video files", "*.mp4 *.mov *.avi *.mkv *.webm *.flv")]
)
if not video_path:
return
srt_output_path = filedialog.asksaveasfilename(
title="Save SRT subtitles as...",
defaultextension=".srt",
filetypes=[("Subtitle files", "*.srt")]
)
if not srt_output_path:
return
# Disable button during processing
select_button.config(state="disabled", text="Processing...")
progress_bar["value"] = 0
progress_label.config(text="Starting speech recognition...")
status_label.config(text="🔄 Processing video...", fg="blue")
root.update()
def update_progress(current, total):
percent = (current / total) * 100
progress_bar["value"] = percent
progress_label.config(text=f"Processing: {current}/{total} segments")
root.update()
def process_video():
try:
model_size = model_size_var.get()
language = language_var.get()
success = transcribe_video_to_srt(
video_path,
srt_output_path,
progress_callback=update_progress,
model_size=model_size,
language=language
)
if success:
status_label.config(text="✅ Subtitles generated successfully!", fg="green")
messagebox.showinfo("Success", f"Subtitles saved to:\n{srt_output_path}\n\nOpen with your subtitle_generator2.py for editing!")
else:
status_label.config(text="❌ Error occurred during processing", fg="red")
messagebox.showerror("Error", "Something went wrong. Check console for details.")
except Exception as e:
status_label.config(text="❌ Unexpected error occurred", fg="red")
messagebox.showerror("Error", f"Unexpected error: {e}")
finally:
# Re-enable button
select_button.config(state="normal", text="📂 Select Video and Generate Subtitles")
progress_label.config(text="Done")
# Run in separate thread to prevent GUI freezing
thread = threading.Thread(target=process_video)
thread.daemon = True
thread.start()
# GUI Setup
root = tk.Tk()
root.title("🎬 Auto Subtitle Generator - Speech to SRT")
root.geometry("500x350")
frame = tk.Frame(root, padx=20, pady=20)
frame.pack(fill="both", expand=True)
# Title
title_label = tk.Label(frame, text="🎬 Auto Subtitle Generator", font=("Arial", 16, "bold"))
title_label.pack(pady=(0, 10))
subtitle_label = tk.Label(frame, text="Extract speech from video and create perfectly timed SRT subtitles", font=("Arial", 10))
subtitle_label.pack(pady=(0, 20))
# Settings Frame
settings_frame = tk.LabelFrame(frame, text="Whisper AI Settings", padx=10, pady=10)
settings_frame.pack(fill="x", pady=(0, 15))
# Model Size Selection
model_frame = tk.Frame(settings_frame)
model_frame.pack(fill="x", pady=(0, 10))
tk.Label(model_frame, text="🧠 Model Size:", font=("Arial", 9)).pack(side="left")
model_size_var = tk.StringVar(value="base")
model_dropdown = ttk.Combobox(model_frame, textvariable=model_size_var,
values=["tiny", "base", "small", "medium", "large"],
state="readonly", width=12)
model_dropdown.pack(side="right")
# Language Selection
language_frame = tk.Frame(settings_frame)
language_frame.pack(fill="x", pady=(0, 10))
tk.Label(language_frame, text="🌍 Language:", font=("Arial", 9)).pack(side="left")
language_var = tk.StringVar(value="auto")
language_dropdown = ttk.Combobox(language_frame, textvariable=language_var,
values=["auto", "en", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "zh"],
state="readonly", width=12)
language_dropdown.pack(side="right")
# Help text
help_label = tk.Label(
settings_frame,
text=" 💡 Base model recommended for best speed/accuracy balance\n 🔍 Auto language detection works for most videos",
font=("Arial", 8),
fg="gray"
)
help_label.pack(anchor="w")
# Main Action Button
select_button = tk.Button(
frame,
text="📂 Select Video and Generate Subtitles",
command=select_file_and_generate,
font=("Arial", 11, "bold"),
bg="#4CAF50",
fg="white",
pady=8
)
select_button.pack(pady=15, fill="x")
# Progress Section
progress_frame = tk.LabelFrame(frame, text="Progress", padx=10, pady=10)
progress_frame.pack(fill="x", pady=(0, 10))
progress_bar = ttk.Progressbar(progress_frame, length=400, mode="determinate")
progress_bar.pack(fill="x", pady=(0, 5))
progress_label = tk.Label(progress_frame, text="Ready to process video", font=("Arial", 9))
progress_label.pack()
# Status Label
status_label = tk.Label(frame, text="💡 Tip: Use generated SRT files with subtitle_generator2.py for advanced editing!", font=("Arial", 9), fg="blue")
status_label.pack(pady=(10, 0))
root.mainloop()