ShortGenerator/subtitle_generator.py

import os
import math
import tempfile
import moviepy as mp
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
from pydub import AudioSegment
from pydub.silence import split_on_silence
import threading
from faster_whisper import WhisperModel


def format_time(seconds):
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    secs = int(seconds % 60)
    millis = int((seconds - int(seconds)) * 1000)
    return f"{hours:02}:{minutes:02}:{secs:02},{millis:03}"


def wrap_text(text, max_len=40):
    """
    Wraps text to ~max_len characters per line without cutting words.
    """
    words = text.split()
    lines = []
    current_line = ""

    for word in words:
        if len(current_line + " " + word) <= max_len:
            current_line += (" " if current_line else "") + word
        else:
            lines.append(current_line)
            current_line = word

    if current_line:
        lines.append(current_line)

    return "\n".join(lines)


def write_srt(subtitles, output_path):
    with open(output_path, 'w', encoding='utf-8') as f:
        for i, sub in enumerate(subtitles, 1):
            f.write(f"{i}\n")
            f.write(f"{format_time(sub['start'])} --> {format_time(sub['end'])}\n")
            f.write(f"{wrap_text(sub['text'])}\n\n")


def transcribe_video_to_srt(video_path, srt_output_path, progress_callback=None, model_size="base", language="auto"):
    try:
        print("📽️ Loading video file...")
        video = mp.VideoFileClip(video_path)
        audio = video.audio

        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
            temp_audio_path = temp_audio_file.name
            print("🔊 Extracting audio...")
            audio.write_audiofile(temp_audio_path, logger=None)

        print(f"🤖 Loading Whisper model ({model_size})...")
        # Initialize Whisper model - much more accurate than Google Speech Recognition
        model = WhisperModel(model_size, device="cpu", compute_type="int8")

        print("🎯 Transcribing with Whisper AI...")
        # Transcribe the entire audio file at once - Whisper handles timing automatically
        segments, info = model.transcribe(
            temp_audio_path,
            language=None if language == "auto" else language,
            word_timestamps=True,
            vad_filter=True,  # Voice Activity Detection for better accuracy
            vad_parameters=dict(min_silence_duration_ms=500)
        )

        print(f"🌍 Detected language: {info.language} (confidence: {info.language_probability:.2f})")

        subtitles = []
        segment_list = list(segments)  # Convert generator to list for progress tracking

        print(f"📝 Processing {len(segment_list)} speech segments...")

        for i, segment in enumerate(segment_list):
            # Whisper provides precise timing and text
            start_time = segment.start
            end_time = segment.end
            text = segment.text.strip()

            if text and len(text) > 0:
                subtitles.append({
                    "start": start_time,
                    "end": end_time,
                    "text": text
                })
                print(f"✅ Segment {i+1}: '{text[:50]}...' ({start_time:.1f}s - {end_time:.1f}s)")

            # Update progress bar
            if progress_callback:
                progress_callback(i + 1, len(segment_list))

        # Clean up
        if os.path.exists(temp_audio_path):
            os.remove(temp_audio_path)

        if video:
            video.close()
        if audio:
            audio.close()

        print(f"🎯 Generated {len(subtitles)} subtitle segments with Whisper AI")
        write_srt(subtitles, srt_output_path)
        return True

    except Exception as e:
        print(f"❌ Error: {e}")
        return False


# -------------------- GUI --------------------

def select_file_and_generate():
    video_path = filedialog.askopenfilename(
        title="Select a video file",
        filetypes=[("Video files", "*.mp4 *.mov *.avi *.mkv *.webm *.flv")]
    )

    if not video_path:
        return

    srt_output_path = filedialog.asksaveasfilename(
        title="Save SRT subtitles as...",
        defaultextension=".srt",
        filetypes=[("Subtitle files", "*.srt")]
    )

    if not srt_output_path:
        return

    # Disable button during processing
    select_button.config(state="disabled", text="Processing...")
    progress_bar["value"] = 0
    progress_label.config(text="Starting speech recognition...")
    status_label.config(text="🔄 Processing video...", fg="blue")
    root.update()

    def update_progress(current, total):
        percent = (current / total) * 100
        progress_bar["value"] = percent
        progress_label.config(text=f"Processing: {current}/{total} segments")
        root.update()

    def process_video():
        try:
            model_size = model_size_var.get()
            language = language_var.get()

            success = transcribe_video_to_srt(
                video_path,
                srt_output_path,
                progress_callback=update_progress,
                model_size=model_size,
                language=language
            )

            if success:
                status_label.config(text="✅ Subtitles generated successfully!", fg="green")
                messagebox.showinfo("Success", f"Subtitles saved to:\n{srt_output_path}\n\nOpen with your subtitle_generator2.py for editing!")
            else:
                status_label.config(text="❌ Error occurred during processing", fg="red")
                messagebox.showerror("Error", "Something went wrong. Check console for details.")

        except Exception as e:
            status_label.config(text="❌ Unexpected error occurred", fg="red")
            messagebox.showerror("Error", f"Unexpected error: {e}")
        finally:
            # Re-enable button
            select_button.config(state="normal", text="📂 Select Video and Generate Subtitles")
            progress_label.config(text="Done")

    # Run in separate thread to prevent GUI freezing
    thread = threading.Thread(target=process_video)
    thread.daemon = True
    thread.start()


# GUI Setup
root = tk.Tk()
root.title("🎬 Auto Subtitle Generator - Speech to SRT")
root.geometry("500x350")

frame = tk.Frame(root, padx=20, pady=20)
frame.pack(fill="both", expand=True)

# Title
title_label = tk.Label(frame, text="🎬 Auto Subtitle Generator", font=("Arial", 16, "bold"))
title_label.pack(pady=(0, 10))

subtitle_label = tk.Label(frame, text="Extract speech from video and create perfectly timed SRT subtitles", font=("Arial", 10))
subtitle_label.pack(pady=(0, 20))

# Settings Frame
settings_frame = tk.LabelFrame(frame, text="Whisper AI Settings", padx=10, pady=10)
settings_frame.pack(fill="x", pady=(0, 15))

# Model Size Selection
model_frame = tk.Frame(settings_frame)
model_frame.pack(fill="x", pady=(0, 10))

tk.Label(model_frame, text="🧠 Model Size:", font=("Arial", 9)).pack(side="left")
model_size_var = tk.StringVar(value="base")
model_dropdown = ttk.Combobox(model_frame, textvariable=model_size_var,
                              values=["tiny", "base", "small", "medium", "large"],
                              state="readonly", width=12)
model_dropdown.pack(side="right")

# Language Selection
language_frame = tk.Frame(settings_frame)
language_frame.pack(fill="x", pady=(0, 10))

tk.Label(language_frame, text="🌍 Language:", font=("Arial", 9)).pack(side="left")
language_var = tk.StringVar(value="auto")
language_dropdown = ttk.Combobox(language_frame, textvariable=language_var,
                                 values=["auto", "en", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "zh"],
                                 state="readonly", width=12)
language_dropdown.pack(side="right")

# Help text
help_label = tk.Label(
    settings_frame,
    text="   💡 Base model recommended for best speed/accuracy balance\n   🔍 Auto language detection works for most videos",
    font=("Arial", 8),
    fg="gray"
)
help_label.pack(anchor="w")

# Main Action Button
select_button = tk.Button(
    frame,
    text="📂 Select Video and Generate Subtitles",
    command=select_file_and_generate,
    font=("Arial", 11, "bold"),
    bg="#4CAF50",
    fg="white",
    pady=8
)
select_button.pack(pady=15, fill="x")

# Progress Section
progress_frame = tk.LabelFrame(frame, text="Progress", padx=10, pady=10)
progress_frame.pack(fill="x", pady=(0, 10))

progress_bar = ttk.Progressbar(progress_frame, length=400, mode="determinate")
progress_bar.pack(fill="x", pady=(0, 5))

progress_label = tk.Label(progress_frame, text="Ready to process video", font=("Arial", 9))
progress_label.pack()

# Status Label
status_label = tk.Label(frame, text="💡 Tip: Use generated SRT files with subtitle_generator2.py for advanced editing!", font=("Arial", 9), fg="blue")
status_label.pack(pady=(10, 0))

root.mainloop()