262 lines
8.9 KiB
Python
262 lines
8.9 KiB
Python
import os
|
|
import math
|
|
import tempfile
|
|
import moviepy as mp
|
|
import tkinter as tk
|
|
from tkinter import filedialog, messagebox, ttk
|
|
from pydub import AudioSegment
|
|
from pydub.silence import split_on_silence
|
|
import threading
|
|
from faster_whisper import WhisperModel
|
|
|
|
|
|
def format_time(seconds):
|
|
hours = int(seconds // 3600)
|
|
minutes = int((seconds % 3600) // 60)
|
|
secs = int(seconds % 60)
|
|
millis = int((seconds - int(seconds)) * 1000)
|
|
return f"{hours:02}:{minutes:02}:{secs:02},{millis:03}"
|
|
|
|
|
|
def wrap_text(text, max_len=40):
|
|
"""
|
|
Wraps text to ~max_len characters per line without cutting words.
|
|
"""
|
|
words = text.split()
|
|
lines = []
|
|
current_line = ""
|
|
|
|
for word in words:
|
|
if len(current_line + " " + word) <= max_len:
|
|
current_line += (" " if current_line else "") + word
|
|
else:
|
|
lines.append(current_line)
|
|
current_line = word
|
|
|
|
if current_line:
|
|
lines.append(current_line)
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def write_srt(subtitles, output_path):
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
for i, sub in enumerate(subtitles, 1):
|
|
f.write(f"{i}\n")
|
|
f.write(f"{format_time(sub['start'])} --> {format_time(sub['end'])}\n")
|
|
f.write(f"{wrap_text(sub['text'])}\n\n")
|
|
|
|
|
|
def transcribe_video_to_srt(video_path, srt_output_path, progress_callback=None, model_size="base", language="auto"):
|
|
try:
|
|
print("📽️ Loading video file...")
|
|
video = mp.VideoFileClip(video_path)
|
|
audio = video.audio
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
|
|
temp_audio_path = temp_audio_file.name
|
|
print("🔊 Extracting audio...")
|
|
audio.write_audiofile(temp_audio_path, logger=None)
|
|
|
|
print(f"🤖 Loading Whisper model ({model_size})...")
|
|
# Initialize Whisper model - much more accurate than Google Speech Recognition
|
|
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
|
|
|
print("🎯 Transcribing with Whisper AI...")
|
|
# Transcribe the entire audio file at once - Whisper handles timing automatically
|
|
segments, info = model.transcribe(
|
|
temp_audio_path,
|
|
language=None if language == "auto" else language,
|
|
word_timestamps=True,
|
|
vad_filter=True, # Voice Activity Detection for better accuracy
|
|
vad_parameters=dict(min_silence_duration_ms=500)
|
|
)
|
|
|
|
print(f"🌍 Detected language: {info.language} (confidence: {info.language_probability:.2f})")
|
|
|
|
subtitles = []
|
|
segment_list = list(segments) # Convert generator to list for progress tracking
|
|
|
|
print(f"📝 Processing {len(segment_list)} speech segments...")
|
|
|
|
for i, segment in enumerate(segment_list):
|
|
# Whisper provides precise timing and text
|
|
start_time = segment.start
|
|
end_time = segment.end
|
|
text = segment.text.strip()
|
|
|
|
if text and len(text) > 0:
|
|
subtitles.append({
|
|
"start": start_time,
|
|
"end": end_time,
|
|
"text": text
|
|
})
|
|
print(f"✅ Segment {i+1}: '{text[:50]}...' ({start_time:.1f}s - {end_time:.1f}s)")
|
|
|
|
# Update progress bar
|
|
if progress_callback:
|
|
progress_callback(i + 1, len(segment_list))
|
|
|
|
# Clean up
|
|
if os.path.exists(temp_audio_path):
|
|
os.remove(temp_audio_path)
|
|
|
|
if video:
|
|
video.close()
|
|
if audio:
|
|
audio.close()
|
|
|
|
print(f"🎯 Generated {len(subtitles)} subtitle segments with Whisper AI")
|
|
write_srt(subtitles, srt_output_path)
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error: {e}")
|
|
return False
|
|
|
|
|
|
# -------------------- GUI --------------------
|
|
|
|
def select_file_and_generate():
|
|
video_path = filedialog.askopenfilename(
|
|
title="Select a video file",
|
|
filetypes=[("Video files", "*.mp4 *.mov *.avi *.mkv *.webm *.flv")]
|
|
)
|
|
|
|
if not video_path:
|
|
return
|
|
|
|
srt_output_path = filedialog.asksaveasfilename(
|
|
title="Save SRT subtitles as...",
|
|
defaultextension=".srt",
|
|
filetypes=[("Subtitle files", "*.srt")]
|
|
)
|
|
|
|
if not srt_output_path:
|
|
return
|
|
|
|
# Disable button during processing
|
|
select_button.config(state="disabled", text="Processing...")
|
|
progress_bar["value"] = 0
|
|
progress_label.config(text="Starting speech recognition...")
|
|
status_label.config(text="🔄 Processing video...", fg="blue")
|
|
root.update()
|
|
|
|
def update_progress(current, total):
|
|
percent = (current / total) * 100
|
|
progress_bar["value"] = percent
|
|
progress_label.config(text=f"Processing: {current}/{total} segments")
|
|
root.update()
|
|
|
|
def process_video():
|
|
try:
|
|
model_size = model_size_var.get()
|
|
language = language_var.get()
|
|
|
|
success = transcribe_video_to_srt(
|
|
video_path,
|
|
srt_output_path,
|
|
progress_callback=update_progress,
|
|
model_size=model_size,
|
|
language=language
|
|
)
|
|
|
|
if success:
|
|
status_label.config(text="✅ Subtitles generated successfully!", fg="green")
|
|
messagebox.showinfo("Success", f"Subtitles saved to:\n{srt_output_path}\n\nOpen with your subtitle_generator2.py for editing!")
|
|
else:
|
|
status_label.config(text="❌ Error occurred during processing", fg="red")
|
|
messagebox.showerror("Error", "Something went wrong. Check console for details.")
|
|
|
|
except Exception as e:
|
|
status_label.config(text="❌ Unexpected error occurred", fg="red")
|
|
messagebox.showerror("Error", f"Unexpected error: {e}")
|
|
finally:
|
|
# Re-enable button
|
|
select_button.config(state="normal", text="📂 Select Video and Generate Subtitles")
|
|
progress_label.config(text="Done")
|
|
|
|
# Run in separate thread to prevent GUI freezing
|
|
thread = threading.Thread(target=process_video)
|
|
thread.daemon = True
|
|
thread.start()
|
|
|
|
|
|
# GUI Setup
|
|
root = tk.Tk()
|
|
root.title("🎬 Auto Subtitle Generator - Speech to SRT")
|
|
root.geometry("500x350")
|
|
|
|
frame = tk.Frame(root, padx=20, pady=20)
|
|
frame.pack(fill="both", expand=True)
|
|
|
|
# Title
|
|
title_label = tk.Label(frame, text="🎬 Auto Subtitle Generator", font=("Arial", 16, "bold"))
|
|
title_label.pack(pady=(0, 10))
|
|
|
|
subtitle_label = tk.Label(frame, text="Extract speech from video and create perfectly timed SRT subtitles", font=("Arial", 10))
|
|
subtitle_label.pack(pady=(0, 20))
|
|
|
|
# Settings Frame
|
|
settings_frame = tk.LabelFrame(frame, text="Whisper AI Settings", padx=10, pady=10)
|
|
settings_frame.pack(fill="x", pady=(0, 15))
|
|
|
|
# Model Size Selection
|
|
model_frame = tk.Frame(settings_frame)
|
|
model_frame.pack(fill="x", pady=(0, 10))
|
|
|
|
tk.Label(model_frame, text="🧠 Model Size:", font=("Arial", 9)).pack(side="left")
|
|
model_size_var = tk.StringVar(value="base")
|
|
model_dropdown = ttk.Combobox(model_frame, textvariable=model_size_var,
|
|
values=["tiny", "base", "small", "medium", "large"],
|
|
state="readonly", width=12)
|
|
model_dropdown.pack(side="right")
|
|
|
|
# Language Selection
|
|
language_frame = tk.Frame(settings_frame)
|
|
language_frame.pack(fill="x", pady=(0, 10))
|
|
|
|
tk.Label(language_frame, text="🌍 Language:", font=("Arial", 9)).pack(side="left")
|
|
language_var = tk.StringVar(value="auto")
|
|
language_dropdown = ttk.Combobox(language_frame, textvariable=language_var,
|
|
values=["auto", "en", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "zh"],
|
|
state="readonly", width=12)
|
|
language_dropdown.pack(side="right")
|
|
|
|
# Help text
|
|
help_label = tk.Label(
|
|
settings_frame,
|
|
text=" 💡 Base model recommended for best speed/accuracy balance\n 🔍 Auto language detection works for most videos",
|
|
font=("Arial", 8),
|
|
fg="gray"
|
|
)
|
|
help_label.pack(anchor="w")
|
|
|
|
# Main Action Button
|
|
select_button = tk.Button(
|
|
frame,
|
|
text="📂 Select Video and Generate Subtitles",
|
|
command=select_file_and_generate,
|
|
font=("Arial", 11, "bold"),
|
|
bg="#4CAF50",
|
|
fg="white",
|
|
pady=8
|
|
)
|
|
select_button.pack(pady=15, fill="x")
|
|
|
|
# Progress Section
|
|
progress_frame = tk.LabelFrame(frame, text="Progress", padx=10, pady=10)
|
|
progress_frame.pack(fill="x", pady=(0, 10))
|
|
|
|
progress_bar = ttk.Progressbar(progress_frame, length=400, mode="determinate")
|
|
progress_bar.pack(fill="x", pady=(0, 5))
|
|
|
|
progress_label = tk.Label(progress_frame, text="Ready to process video", font=("Arial", 9))
|
|
progress_label.pack()
|
|
|
|
# Status Label
|
|
status_label = tk.Label(frame, text="💡 Tip: Use generated SRT files with subtitle_generator2.py for advanced editing!", font=("Arial", 9), fg="blue")
|
|
status_label.pack(pady=(10, 0))
|
|
|
|
root.mainloop()
|