ShortGenerator/shorts_generator2.py

import os
import numpy as np
from moviepy import VideoFileClip, TextClip, CompositeVideoClip
from faster_whisper import WhisperModel
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import threading
import cv2
from scipy import signal
import librosa

class ToolTip:
    """Create a tooltip for a given widget"""
    def __init__(self, widget, text='widget info', side='right'):
        self.widget = widget
        self.text = text
        self.side = side
        self.widget.bind("<Enter>", self.enter)
        self.widget.bind("<Leave>", self.leave)
        self.tipwindow = None

    def enter(self, event=None):
        self.showtip()

    def leave(self, event=None):
        self.hidetip()

    def showtip(self):
        if self.tipwindow or not self.text:
            return

        # Get widget position
        x = self.widget.winfo_rootx()
        y = self.widget.winfo_rooty()
        w = self.widget.winfo_width()
        h = self.widget.winfo_height()

        # Position tooltip based on side preference
        if self.side == 'right':
            x = x + w + 10  # 10px to the right of widget
            y = y
        else:
            x = x + 25
            y = y + h + 5

        self.tipwindow = tw = tk.Toplevel(self.widget)
        tw.wm_overrideredirect(True)
        tw.wm_geometry("+%d+%d" % (x, y))
        label = tk.Label(tw, text=self.text, justify=tk.LEFT,
                        background="#ffffe0", relief=tk.SOLID, borderwidth=1,
                        font=("Arial", "9", "normal"), wraplength=350)
        label.pack(ipadx=5, ipady=3)

    def hidetip(self):
        tw = self.tipwindow
        self.tipwindow = None
        if tw:
            tw.destroy()

class ComboboxTooltip:
    """Special tooltip for combobox that shows on listbox hover"""
    def __init__(self, combobox, descriptions):
        self.combobox = combobox
        self.descriptions = descriptions
        self.tipwindow = None
        self.bound_listbox = None

        # Bind to combobox events
        self.combobox.bind("<Button-1>", self.on_click)
        self.combobox.bind("<KeyPress>", self.on_keypress)

    def on_click(self, event):
        # Try to find the listbox when dropdown opens
        self.combobox.after(50, self.bind_listbox)

    def on_keypress(self, event):
        # Handle keyboard navigation
        self.combobox.after(50, self.bind_listbox)

    def bind_listbox(self):
        # Find the listbox widget more reliably
        try:
            # Look through all toplevel windows for the combobox popdown
            for window in self.combobox.winfo_toplevel().winfo_children():
                window_class = window.winfo_class()
                if window_class == 'Toplevel':
                    # Found a toplevel, look for listbox inside
                    for child in window.winfo_children():
                        if child.winfo_class() == 'Listbox':
                            if self.bound_listbox != child:
                                self.bound_listbox = child
                                child.bind("<Motion>", self.on_listbox_motion)
                                child.bind("<Leave>", self.on_listbox_leave)
                                child.bind("<ButtonRelease-1>", self.on_listbox_leave)
                            return
        except Exception as e:
            # Fallback method - try to find any listbox
            try:
                # Alternative approach: look for the popdown frame
                for child in self.combobox.tk.call('winfo', 'children', '.'):
                    if 'popdown' in str(child):
                        popdown = self.combobox.nametowidget(child)
                        for subchild in popdown.winfo_children():
                            if subchild.winfo_class() == 'Listbox':
                                if self.bound_listbox != subchild:
                                    self.bound_listbox = subchild
                                    subchild.bind("<Motion>", self.on_listbox_motion)
                                    subchild.bind("<Leave>", self.on_listbox_leave)
                                    subchild.bind("<ButtonRelease-1>", self.on_listbox_leave)
                                return
            except:
                pass

    def on_listbox_motion(self, event):
        try:
            listbox = event.widget
            index = listbox.nearest(event.y)
            if 0 <= index < len(self.combobox['values']):
                selection = self.combobox['values'][index]
                if selection in self.descriptions:
                    self.show_tooltip(event, self.descriptions[selection])
        except Exception:
            pass

    def on_listbox_leave(self, event):
        self.hide_tooltip()

    def show_tooltip(self, event, text):
        self.hide_tooltip()  # Hide any existing tooltip

        try:
            x = event.widget.winfo_rootx() + event.widget.winfo_width() + 10
            y = event.widget.winfo_rooty() + event.y - 20

            self.tipwindow = tw = tk.Toplevel(event.widget)
            tw.wm_overrideredirect(True)
            tw.wm_geometry("+%d+%d" % (x, y))
            label = tk.Label(tw, text=text, justify=tk.LEFT,
                            background="#ffffe0", relief=tk.SOLID, borderwidth=1,
                            font=("Arial", "9", "normal"), wraplength=350)
            label.pack(ipadx=5, ipady=3)
        except Exception:
            pass

    def hide_tooltip(self):
        if self.tipwindow:
            try:
                self.tipwindow.destroy()
            except:
                pass
            self.tipwindow = None

def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
    print("🔍 Analyzing audio...")
    clip = VideoFileClip(video_path)
    audio = clip.audio.to_soundarray(fps=44100)
    volume = np.linalg.norm(audio, axis=1)
    chunk_size = int(chunk_duration * 44100)

    loud_chunks = []
    max_db = -float('inf')
    for i in range(0, len(volume), chunk_size):
        chunk = volume[i:i+chunk_size]
        db = 20 * np.log10(np.mean(chunk) + 1e-10)
        max_db = max(max_db, db)
        if db > threshold_db:
            start = i / 44100
            loud_chunks.append((start, min(start + chunk_duration, clip.duration)))

    print(f"🔊 Max volume found: {max_db:.2f} dB, threshold: {threshold_db} dB")
    print(f"📈 Found {len(loud_chunks)} loud moments")
    clip.close()
    return loud_chunks

def detect_scene_changes(video_path, chunk_duration=5, threshold=0.3):
    """Detect dramatic visual scene changes"""
    print("🎬 Analyzing scene changes...")
    clip = VideoFileClip(video_path)

    # Sample frames at regular intervals
    sample_rate = 2  # Check every 2 seconds
    times = np.arange(0, clip.duration, sample_rate)

    scene_changes = []
    prev_frame = None

    for i, t in enumerate(times[:-1]):
        try:
            # Get current and next frame
            frame1 = clip.get_frame(t)
            frame2 = clip.get_frame(times[i + 1])

            # Convert to grayscale and resize for faster processing
            gray1 = cv2.cvtColor(frame1, cv2.COLOR_RGB2GRAY)
            gray2 = cv2.cvtColor(frame2, cv2.COLOR_RGB2GRAY)
            gray1 = cv2.resize(gray1, (160, 90))  # Small size for speed
            gray2 = cv2.resize(gray2, (160, 90))

            # Calculate structural similarity difference
            diff = np.mean(np.abs(gray1.astype(float) - gray2.astype(float))) / 255.0

            if diff > threshold:
                start = max(0, t - chunk_duration/2)
                end = min(clip.duration, t + chunk_duration/2)
                scene_changes.append((start, end))

        except Exception as e:
            print(f"⚠️ Frame analysis error at {t:.1f}s: {e}")
            continue

    print(f"🎬 Found {len(scene_changes)} scene changes")
    clip.close()
    return scene_changes

def detect_motion_intensity(video_path, chunk_duration=5, threshold=0.15):
    """Detect high motion/action moments"""
    print("🏃 Analyzing motion intensity...")
    clip = VideoFileClip(video_path)

    sample_rate = 1  # Check every second
    times = np.arange(0, clip.duration - 1, sample_rate)

    motion_moments = []

    for t in times:
        try:
            # Get two consecutive frames
            frame1 = clip.get_frame(t)
            frame2 = clip.get_frame(t + 0.5)  # Half second later

            # Convert to grayscale and resize
            gray1 = cv2.cvtColor(frame1, cv2.COLOR_RGB2GRAY)
            gray2 = cv2.cvtColor(frame2, cv2.COLOR_RGB2GRAY)
            gray1 = cv2.resize(gray1, (160, 90))
            gray2 = cv2.resize(gray2, (160, 90))

            # Calculate optical flow magnitude
            flow = cv2.calcOpticalFlowPyrLK(gray1, gray2,
                                           np.random.randint(0, 160, (100, 1, 2)).astype(np.float32),
                                           None)[0]

            if flow is not None:
                motion_magnitude = np.mean(np.linalg.norm(flow.reshape(-1, 2), axis=1))

                if motion_magnitude > threshold:
                    start = max(0, t - chunk_duration/2)
                    end = min(clip.duration, t + chunk_duration/2)
                    motion_moments.append((start, end))

        except Exception as e:
            print(f"⚠️ Motion analysis error at {t:.1f}s: {e}")
            continue

    print(f"🏃 Found {len(motion_moments)} high-motion moments")
    clip.close()
    return motion_moments

def detect_speech_emotion(video_path, chunk_duration=5):
    """Detect emotional/excited speech patterns"""
    print("😄 Analyzing speech emotions...")

    # Use Whisper to get detailed speech analysis
    model = WhisperModel("base", device="cpu", compute_type="int8")
    segments, _ = model.transcribe(video_path, beam_size=5, vad_filter=True, word_timestamps=True)

    emotional_moments = []
    excitement_keywords = ['wow', 'amazing', 'incredible', 'unbelievable', 'awesome', 'fantastic',
                          'omg', 'what', 'no way', 'crazy', 'insane', 'perfect', 'yes', 'exactly']

    for segment in segments:
        text = segment.text.lower()

        # Check for excitement keywords
        has_keywords = any(keyword in text for keyword in excitement_keywords)

        # Check for multiple exclamation-worthy patterns
        has_caps = any(word.isupper() for word in segment.text.split())
        has_punctuation = '!' in segment.text or '?' in segment.text
        is_short_excited = len(text.split()) <= 5 and (has_keywords or has_caps)

        if has_keywords or has_punctuation or is_short_excited:
            start = max(0, segment.start - chunk_duration/2)
            end = min(segment.end + chunk_duration/2, segment.end + chunk_duration)
            emotional_moments.append((start, end))

    print(f"😄 Found {len(emotional_moments)} emotional speech moments")
    return emotional_moments

def detect_audio_peaks(video_path, chunk_duration=5):
    """Detect sudden audio peaks (bass drops, beats, impacts)"""
    print("🎵 Analyzing audio peaks...")

    clip = VideoFileClip(video_path)
    audio = clip.audio.to_soundarray(fps=22050)  # Lower sample rate for speed

    # Convert to mono if stereo
    if len(audio.shape) > 1:
        audio = np.mean(audio, axis=1)

    # Find spectral peaks (bass, treble spikes)
    peaks, _ = signal.find_peaks(np.abs(audio), height=np.percentile(np.abs(audio), 95))

    peak_moments = []
    prev_peak = 0

    for peak in peaks:
        peak_time = peak / 22050

        # Avoid too close peaks
        if peak_time - prev_peak > chunk_duration:
            start = max(0, peak_time - chunk_duration/2)
            end = min(clip.duration, peak_time + chunk_duration/2)
            peak_moments.append((start, end))
            prev_peak = peak_time

    print(f"🎵 Found {len(peak_moments)} audio peak moments")
    clip.close()
    return peak_moments

def detect_combined_intensity(video_path, chunk_duration=5, weights=None):
    """Combine multiple detection methods for best moments"""
    print("🎯 Running comprehensive moment analysis...")

    if weights is None:
        weights = {'loud': 0.3, 'scene': 0.2, 'motion': 0.2, 'speech': 0.2, 'peaks': 0.1}

    # Get all detection results
    loud_moments = detect_loud_moments(video_path, chunk_duration, threshold_db=5)  # Lower threshold
    scene_moments = detect_scene_changes(video_path, chunk_duration)
    motion_moments = detect_motion_intensity(video_path, chunk_duration)
    speech_moments = detect_speech_emotion(video_path, chunk_duration)
    peak_moments = detect_audio_peaks(video_path, chunk_duration)

    # Create time-based scoring
    clip = VideoFileClip(video_path)
    duration = clip.duration
    clip.close()

    # Score each second of the video
    time_scores = {}

    for moments, weight in [(loud_moments, weights['loud']),
                           (scene_moments, weights['scene']),
                           (motion_moments, weights['motion']),
                           (speech_moments, weights['speech']),
                           (peak_moments, weights['peaks'])]:
        for start, end in moments:
            for t in range(int(start), int(end) + 1):
                if t not in time_scores:
                    time_scores[t] = 0
                time_scores[t] += weight

    # Find the highest scoring segments
    if not time_scores:
        return loud_moments  # Fallback to loud moments

    # Get top scoring time periods
    sorted_times = sorted(time_scores.items(), key=lambda x: x[1], reverse=True)

    combined_moments = []
    used_times = set()

    for time_sec, score in sorted_times:
        if time_sec not in used_times and score > 0.3:  # Minimum threshold
            start = max(0, time_sec - chunk_duration/2)
            end = min(duration, time_sec + chunk_duration/2)
            combined_moments.append((start, end))

            # Mark nearby times as used to avoid overlap
            for t in range(max(0, time_sec - chunk_duration),
                          min(int(duration), time_sec + chunk_duration)):
                used_times.add(t)

    print(f"🎯 Found {len(combined_moments)} high-intensity combined moments")
    return combined_moments

def transcribe_and_extract_subtitles(video_path, start, end):
    print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...")
    model = WhisperModel("base", device="cpu", compute_type="int8")
    segments, _ = model.transcribe(video_path, beam_size=5, language="en", vad_filter=True)

    subtitles = []
    for segment in segments:
        if start <= segment.start <= end:
            subtitles.append((segment.start - start, segment.end - start, segment.text))
    return subtitles

def create_short_clip(video_path, start, end, subtitles, output_path):
    print(f"🎬 Creating short: {output_path}")
    clip = VideoFileClip(video_path).subclipped(start, end)
    video_duration = clip.duration
    print(f"📏 Video clip duration: {video_duration:.2f}s")

    vertical_clip = clip.resized(height=1920).cropped(width=1080, x_center=clip.w / 2)
    clips = [vertical_clip]

    subtitle_y_px = 1550  # Fixed Y position for subtitles

    for (s, e, text) in subtitles:
        try:
            subtitle_start = max(0, s)
            subtitle_end = min(e, video_duration)

            if subtitle_start >= video_duration or subtitle_end <= subtitle_start:
                print(f"⚠️ Skipping subtitle outside video duration: {text[:30]}...")
                continue

            words = text.strip().split()
            if not words:
                continue

            # Split into small readable chunks (max ~3-4 words)
            chunks = []
            current_chunk = []
            for word in words:
                current_chunk.append(word)
                if len(current_chunk) >= 2 or len(' '.join(current_chunk)) > 25:
                    chunks.append(' '.join(current_chunk))
                    current_chunk = []
            if current_chunk:
                chunks.append(' '.join(current_chunk))

            chunk_duration = (subtitle_end - subtitle_start) / len(chunks)

            for chunk_idx, chunk_text in enumerate(chunks):
                chunk_start = subtitle_start + (chunk_idx * chunk_duration)
                chunk_end = min(chunk_start + chunk_duration, subtitle_end)

                chunk_words = chunk_text.split()

                # Base subtitle
                base_subtitle = TextClip(
                    text=chunk_text.upper(),
                    font_size=65,
                    color='white',
                    stroke_color='black',
                    stroke_width=5
                )
                text_width, _ = base_subtitle.size
                base_subtitle = base_subtitle.with_start(chunk_start).with_end(chunk_end).with_position(('center', subtitle_y_px))
                clips.append(base_subtitle)

                # Highlighted words (perfectly aligned)
                word_duration = chunk_duration / len(chunk_words)
                current_x = 540 - (text_width / 2)  # 540 is center X of 1080px width

                for i, word in enumerate(chunk_words):
                    word_start = chunk_start + (i * word_duration)
                    word_end = min(word_start + word_duration * 0.8, chunk_end)

                    highlighted_word = TextClip(
                        text=word.upper(),
                        font_size=68,
                        color='#FFD700',
                        stroke_color='#FF6B35',
                        stroke_width=5
                    )
                    word_width, _ = highlighted_word.size

                    word_x = current_x + (word_width / 2)
                    highlighted_word = highlighted_word.with_start(word_start).with_end(word_end).with_position((word_x -125
                    , subtitle_y_px))
                    clips.append(highlighted_word)

                    current_x += word_width + 20  # Add spacing between words

            print(f"✅ Added Opus-style subtitle ({subtitle_start:.1f}s-{subtitle_end:.1f}s): {text[:30]}...")
        except Exception as e:
            print(f"⚠️ Subtitle error: {e}, skipping subtitle: {text[:50]}...")
            continue

    final = CompositeVideoClip(clips, size=(1080, 1920))
    final.write_videofile(output_path, codec="libx264", audio_codec="aac", threads=1)

    clip.reader.close()
    if clip.audio:
        clip.audio.reader.close()
    final.close()

def validate_video(video_path, min_duration=30):
    """Validate video file and return duration"""
    try:
        clip = VideoFileClip(video_path)
        duration = clip.duration
        clip.close()

        if duration < min_duration:
            raise ValueError(f"Video is too short ({duration:.1f}s). Minimum {min_duration}s required.")

        return duration
    except Exception as e:
        if "No such file" in str(e):
            raise FileNotFoundError(f"Video file not found: {video_path}")
        elif "could not open" in str(e).lower():
            raise ValueError(f"Invalid or corrupted video file: {video_path}")
        else:
            raise ValueError(f"Error reading video: {str(e)}")

def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_callback=None,
                   threshold_db=-30, clip_duration=5, detection_mode="loud"):
    os.makedirs(output_folder, exist_ok=True)

    # Validate video first
    try:
        video_duration = validate_video(video_path, min_duration=clip_duration * 2)
        if progress_callback:
            progress_callback(f"✅ Video validated ({video_duration:.1f}s)", 5)
    except Exception as e:
        if progress_callback:
            progress_callback(f"❌ Video validation failed", 0)
        raise e

    # Choose detection method based on mode
    if detection_mode == "loud":
        if progress_callback:
            progress_callback("🔍 Analyzing audio for loud moments...", 10)
        best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)
    elif detection_mode == "scene":
        if progress_callback:
            progress_callback("🎬 Analyzing scene changes...", 10)
        best_moments = detect_scene_changes(video_path, chunk_duration=clip_duration)
    elif detection_mode == "motion":
        if progress_callback:
            progress_callback("🏃 Analyzing motion intensity...", 10)
        best_moments = detect_motion_intensity(video_path, chunk_duration=clip_duration)
    elif detection_mode == "speech":
        if progress_callback:
            progress_callback("😄 Analyzing speech emotions...", 10)
        best_moments = detect_speech_emotion(video_path, chunk_duration=clip_duration)
    elif detection_mode == "peaks":
        if progress_callback:
            progress_callback("🎵 Analyzing audio peaks...", 10)
        best_moments = detect_audio_peaks(video_path, chunk_duration=clip_duration)
    elif detection_mode == "combined":
        if progress_callback:
            progress_callback("🎯 Running comprehensive analysis...", 10)
        best_moments = detect_combined_intensity(video_path, chunk_duration=clip_duration)
    else:
        best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)

    selected = best_moments[:max_clips]

    if not selected:
        mode_name = {
            "loud": "loud moments", "scene": "scene changes", "motion": "motion intensity",
            "speech": "emotional speech", "peaks": "audio peaks", "combined": "interesting moments"
        }.get(detection_mode, "moments")
        raise ValueError(f"No {mode_name} found. Try a different detection mode or adjust settings.")

    if progress_callback:
        progress_callback(f"📊 Found {len(selected)} clips to generate", 20)

    for i, (start, end) in enumerate(selected):
        if progress_callback:
            progress_callback(f"🗣️ Transcribing clip {i+1}/{len(selected)}", 30 + (i * 20))

        subtitles = transcribe_and_extract_subtitles(video_path, start, end)
        out_path = os.path.join(output_folder, f"short_{i+1}.mp4")

        if progress_callback:
            progress_callback(f"🎬 Creating video {i+1}/{len(selected)}", 50 + (i * 20))

        create_short_clip(video_path, start, end, subtitles, out_path)

    if progress_callback:
        progress_callback("✅ All shorts generated successfully!", 100)

# GUI Components
class ShortsGeneratorGUI:
    def __init__(self, root):
        self.root = root
        self.root.title("🎬 AI Shorts Generator - Advanced Video Moment Detection")
        self.root.geometry("650x650")  # Reduced height to eliminate empty space
        self.root.minsize(600, 600)  # Reduced minimum size

        self.video_path = None
        self.output_folder = "shorts"
        self.max_clips = 3
        self.threshold_db = -30
        self.clip_duration = 5

        self.create_widgets()

    def create_widgets(self):
        # Title
        title_label = tk.Label(self.root, text="🎬 AI Shorts Generator", font=("Arial", 16, "bold"))
        title_label.pack(pady=10)

        # Video selection
        video_frame = tk.Frame(self.root)
        video_frame.pack(pady=10, padx=20, fill="x")

        tk.Label(video_frame, text="Select Video File:").pack(anchor="w")
        video_select_frame = tk.Frame(video_frame)
        video_select_frame.pack(fill="x", pady=5)

        self.video_label = tk.Label(video_select_frame, text="No video selected", bg="white", relief="sunken")
        self.video_label.pack(side="left", fill="x", expand=True, padx=(0, 5))

        tk.Button(video_select_frame, text="Browse", command=self.select_video).pack(side="right")

        # Output folder selection
        output_frame = tk.Frame(self.root)
        output_frame.pack(pady=10, padx=20, fill="x")

        tk.Label(output_frame, text="Output Folder:").pack(anchor="w")
        output_select_frame = tk.Frame(output_frame)
        output_select_frame.pack(fill="x", pady=5)

        self.output_label = tk.Label(output_select_frame, text="shorts/", bg="white", relief="sunken")
        self.output_label.pack(side="left", fill="x", expand=True, padx=(0, 5))

        tk.Button(output_select_frame, text="Browse", command=self.select_output_folder).pack(side="right")

        # Settings frame
        settings_frame = tk.LabelFrame(self.root, text="Settings", padx=10, pady=10)
        settings_frame.pack(pady=10, padx=20, fill="x")

        # Max clips with on/off toggle
        clips_frame = tk.Frame(settings_frame)
        clips_frame.pack(fill="x", pady=5)

        clips_left_frame = tk.Frame(clips_frame)
        clips_left_frame.pack(side="left")

        self.use_max_clips = tk.BooleanVar(value=True)
        clips_checkbox = tk.Checkbutton(clips_left_frame, variable=self.use_max_clips, text="Max Clips to Generate:")
        clips_checkbox.pack(side="left")

        self.clips_var = tk.IntVar(value=3)
        self.clips_spinbox = tk.Spinbox(clips_frame, from_=1, to=10, width=5, textvariable=self.clips_var)
        self.clips_spinbox.pack(side="right")

        # Bind checkbox to enable/disable spinbox
        def toggle_clips_limit():
            if self.use_max_clips.get():
                self.clips_spinbox.config(state="normal")
            else:
                self.clips_spinbox.config(state="disabled")

        self.use_max_clips.trace("w", lambda *args: toggle_clips_limit())
        clips_checkbox.config(command=toggle_clips_limit)

        # Add tooltip for max clips setting
        clips_tooltip_text = """Max Clips Control:

• Checked: Limit the number of clips generated
• Unchecked: Generate all detected moments
• 1-3 clips: Quick highlights for social media
• 4-6 clips: Good variety pack
• 7-10 clips: Comprehensive highlight reel

Tip: Start with 3 clips, then increase if you want more content"""
        ToolTip(self.clips_spinbox, clips_tooltip_text, side='right')
        ToolTip(clips_checkbox, clips_tooltip_text, side='right')

        # Detection Mode Selection
        detection_frame = tk.Frame(settings_frame)
        detection_frame.pack(fill="x", pady=5)
        tk.Label(detection_frame, text="Detection Mode:", font=("Arial", 9, "bold")).pack(side="left")

        self.detection_mode_var = tk.StringVar(value="loud")
        self.detection_display_var = tk.StringVar(value="🔊 Loud Moments")

        detection_dropdown = ttk.Combobox(detection_frame, textvariable=self.detection_display_var,
                                          values=["🔊 Loud Moments", "🎬 Scene Changes", "🏃 Motion Intensity",
                                                "😄 Emotional Speech", "🎵 Audio Peaks", "🎯 Smart Combined"],
                                          state="readonly", width=22)
        detection_dropdown.pack(side="right")

        # Store the mapping between display text and internal values
        self.mode_mapping = {
            "🔊 Loud Moments": "loud",
            "🎬 Scene Changes": "scene",
            "🏃 Motion Intensity": "motion",
            "😄 Emotional Speech": "speech",
            "🎵 Audio Peaks": "peaks",
            "🎯 Smart Combined": "combined"
        }

        # Simple, clear descriptions for mode tooltips
        mode_descriptions = {
            "🔊 Loud Moments": """Analyzes audio volume levels to find the loudest parts of your video.

• Best for: Gaming reactions, music highlights, shouting moments
• Finds: High-volume audio segments above the threshold
• Ideal when: Your video has clear volume differences
• Tip: Adjust threshold if too many/few moments found""",

            "🎬 Scene Changes": """Detects dramatic visual transitions and cuts in your video.

• Best for: Movie trailers, montages, location changes
• Finds: Major visual shifts between frames
• Ideal when: Video has multiple scenes or camera angles
• Tip: Great for content with quick cuts or transitions""",

            "🏃 Motion Intensity": """Analyzes movement and action within video frames.

• Best for: Sports highlights, dance videos, action scenes
• Finds: High-movement moments with lots of visual activity
• Ideal when: Video contains physical action or movement
• Tip: Perfect for extracting the most dynamic moments""",

            "😄 Emotional Speech": """Uses AI to detect excited, emotional, or emphatic speech patterns.

• Best for: Reactions, reviews, commentary, tutorials
• Finds: Words like 'wow', 'amazing', exclamations, excited tone
• Ideal when: Video has spoken content with emotional moments
• Tip: Captures the most engaging verbal reactions""",

            "🎵 Audio Peaks": """Detects sudden audio spikes like bass drops, impacts, or sound effects.

• Best for: Music videos, sound effect moments, beat drops
• Finds: Sharp increases in audio frequency or volume
• Ideal when: Video has musical elements or sound effects
• Tip: Great for rhythm-based or audio-driven content""",

            "🎯 Smart Combined": """Intelligently combines all detection methods for optimal results.

• Best for: Any video type, general content, unsure what to use
• Finds: Moments scoring high across multiple analysis methods
• Ideal when: You want the most 'interesting' overall moments
• Tip: Recommended starting point for most videos"""
        }

        # Create tooltip for the dropdown (updates when selection changes)
        current_tooltip_text = mode_descriptions["🔊 Loud Moments"]  # Default
        dropdown_tooltip = ToolTip(detection_dropdown, current_tooltip_text)

        # Update tooltip when selection changes
        def on_detection_change(event):
            selection = detection_dropdown.get()
            mode_map = {
                "🔊 Loud Moments": "loud",
                "🎬 Scene Changes": "scene",
                "🏃 Motion Intensity": "motion",
                "😄 Emotional Speech": "speech",
                "🎵 Audio Peaks": "peaks",
                "🎯 Smart Combined": "combined"
            }
            self.detection_mode_var.set(mode_map.get(selection, "loud"))

            # Update tooltip text for the selected mode
            dropdown_tooltip.text = mode_descriptions.get(selection, "Select a detection mode")

            # Show/hide threshold setting based on mode
            if selection == "🔊 Loud Moments":
                threshold_frame.pack(fill="x", pady=5)
            else:
                threshold_frame.pack_forget()

        detection_dropdown.bind("<<ComboboxSelected>>", on_detection_change)

        # Audio threshold (only shown for loud moments)
        threshold_frame = tk.Frame(settings_frame)
        threshold_frame.pack(fill="x", pady=5)
        threshold_label = tk.Label(threshold_frame, text="Audio Threshold (dB):")
        threshold_label.pack(side="left")
        self.threshold_var = tk.IntVar(value=-30)
        threshold_spinbox = tk.Spinbox(threshold_frame, from_=-50, to=0, width=5, textvariable=self.threshold_var)
        threshold_spinbox.pack(side="right")

        # Add tooltip for threshold setting
        threshold_tooltip_text = """Audio Threshold Control:

• Higher values (closer to 0): Only very loud moments
• Lower values (closer to -50): More moments detected
• Default -30 dB: Good balance for most videos
• Adjust based on your video's audio levels

Example: Gaming videos might need -20 dB, quiet vlogs might need -40 dB"""
        ToolTip(threshold_spinbox, threshold_tooltip_text, side='right')

        # Clip duration (increased to 120 seconds max)
        duration_frame = tk.Frame(settings_frame)
        duration_frame.pack(fill="x", pady=5)
        duration_label = tk.Label(duration_frame, text="Clip Duration (seconds):")
        duration_label.pack(side="left")
        self.duration_var = tk.IntVar(value=5)
        duration_spinbox = tk.Spinbox(duration_frame, from_=3, to=120, width=5, textvariable=self.duration_var)
        duration_spinbox.pack(side="right")

        # Add tooltip for duration setting
        duration_tooltip_text = """Clip Duration Setting:

• 3-10 seconds: Perfect for TikTok/Instagram Reels
• 10-30 seconds: Good for YouTube Shorts
• 30-60 seconds: Longer form highlights
• 60+ seconds: Extended content clips

Shorter clips = more viral potential
Longer clips = more context and story"""
        ToolTip(duration_spinbox, duration_tooltip_text, side='right')

        # Preview button
        self.preview_btn = tk.Button(self.root, text="🔍 Preview Clips",
                                    command=self.preview_clips, bg="#2196F3", fg="white",
                                    font=("Arial", 10, "bold"), pady=5)
        self.preview_btn.pack(pady=5)

        # Add tooltip for preview button
        preview_tooltip_text = """Preview Clips Feature:

• Analyzes your video using the selected detection mode
• Shows all detected moments with timestamps
• Lets you select specific clips to generate
• No video files created - just analysis
• Great for testing settings before full generation

Tip: Always preview first to see what the AI finds!"""
        ToolTip(self.preview_btn, preview_tooltip_text, side='right')

        # Generate button
        self.generate_btn = tk.Button(self.root, text="🎬 Generate Shorts",
                                     command=self.start_generation, bg="#4CAF50", fg="white",
                                     font=("Arial", 12, "bold"), pady=10)
        self.generate_btn.pack(pady=10)

        # Add tooltip for generate button
        generate_tooltip_text = """Generate Shorts Feature:

• Creates actual video files from detected moments
• Adds AI-generated subtitles to each clip
• Formats videos for vertical social media (1080x1920)
• Saves clips to your selected output folder
• Takes longer but creates ready-to-post content

Tip: Use Preview first to fine-tune your settings!"""
        ToolTip(self.generate_btn, generate_tooltip_text, side='right')

        # Progress frame
        progress_frame = tk.Frame(self.root)
        progress_frame.pack(pady=5, padx=20, fill="x")

        self.progress_label = tk.Label(progress_frame, text="Ready to generate shorts")
        self.progress_label.pack()

        self.progress_bar = ttk.Progressbar(progress_frame, length=400, mode="determinate")
        self.progress_bar.pack(pady=3)

    def select_video(self):
        file_path = filedialog.askopenfilename(
            title="Select Video File",
            filetypes=[("Video files", "*.mp4 *.mov *.avi *.mkv *.wmv")]
        )
        if file_path:
            self.video_path = file_path
            self.video_label.config(text=os.path.basename(file_path))

    def select_output_folder(self):
        folder_path = filedialog.askdirectory(title="Select Output Folder")
        if folder_path:
            self.output_folder = folder_path
            self.output_label.config(text=folder_path)

    def preview_clips(self):
        if not self.video_path:
            messagebox.showwarning("Warning", "Please select a video file first!")
            return

        try:
            # Validate video first
            validate_video(self.video_path, min_duration=self.duration_var.get() * 2)

            # Analyze using selected detection mode
            self.preview_btn.config(state="disabled", text="Analyzing...")
            self.root.update()

            detection_mode = self.detection_mode_var.get()

            if detection_mode == "loud":
                moments = detect_loud_moments(
                    self.video_path,
                    chunk_duration=self.duration_var.get(),
                    threshold_db=self.threshold_var.get()
                )
                mode_name = "loud moments"
            elif detection_mode == "scene":
                moments = detect_scene_changes(self.video_path, chunk_duration=self.duration_var.get())
                mode_name = "scene changes"
            elif detection_mode == "motion":
                moments = detect_motion_intensity(self.video_path, chunk_duration=self.duration_var.get())
                mode_name = "motion moments"
            elif detection_mode == "speech":
                moments = detect_speech_emotion(self.video_path, chunk_duration=self.duration_var.get())
                mode_name = "emotional speech"
            elif detection_mode == "peaks":
                moments = detect_audio_peaks(self.video_path, chunk_duration=self.duration_var.get())
                mode_name = "audio peaks"
            elif detection_mode == "combined":
                moments = detect_combined_intensity(self.video_path, chunk_duration=self.duration_var.get())
                mode_name = "interesting moments"
            else:
                moments = detect_loud_moments(
                    self.video_path,
                    chunk_duration=self.duration_var.get(),
                    threshold_db=self.threshold_var.get()
                )
                mode_name = "loud moments"

            if not moments:
                messagebox.showinfo("Preview", f"No {mode_name} found.\nTry a different detection mode or adjust settings.")
                return

            # Show preview window
            preview_window = tk.Toplevel(self.root)
            preview_window.title("Preview and Select Clips")
            preview_window.geometry("500x400")

            tk.Label(preview_window, text=f"Found {len(moments)} {mode_name}:", font=("Arial", 12, "bold")).pack(pady=10)

            # Create scrollable frame for checkboxes
            canvas = tk.Canvas(preview_window)
            scrollbar = tk.Scrollbar(preview_window, orient="vertical", command=canvas.yview)
            scrollable_frame = tk.Frame(canvas)

            scrollable_frame.bind(
                "<Configure>",
                lambda e: canvas.configure(scrollregion=canvas.bbox("all"))
            )

            canvas.create_window((0, 0), window=scrollable_frame, anchor="nw")
            canvas.configure(yscrollcommand=scrollbar.set)

            # Store checkbox variables and clip data
            self.clip_vars = []
            # Use all clips if max clips is disabled, otherwise limit by setting
            clips_to_show = moments if not self.use_max_clips.get() else moments[:self.clips_var.get()]
            self.preview_clips_data = clips_to_show

            # Add selectable clips with checkboxes
            for i, (start, end) in enumerate(self.preview_clips_data, 1):
                duration = end - start
                time_str = f"Clip {i}: {start//60:02.0f}:{start%60:05.2f} - {end//60:02.0f}:{end%60:05.2f} ({duration:.1f}s)"

                clip_var = tk.BooleanVar(value=True)  # Default selected
                self.clip_vars.append(clip_var)

                clip_frame = tk.Frame(scrollable_frame)
                clip_frame.pack(fill="x", padx=10, pady=2)

                checkbox = tk.Checkbutton(clip_frame, variable=clip_var, text=time_str,
                                        font=("Courier", 10), anchor="w")
                checkbox.pack(fill="x")

            canvas.pack(side="left", fill="both", expand=True, padx=10, pady=5)
            scrollbar.pack(side="right", fill="y")

            # Button frame
            button_frame = tk.Frame(preview_window)
            button_frame.pack(fill="x", padx=10, pady=10)

            # Select/Deselect all buttons
            control_frame = tk.Frame(button_frame)
            control_frame.pack(fill="x", pady=5)

            tk.Button(control_frame, text="Select All",
                     command=lambda: [var.set(True) for var in self.clip_vars]).pack(side="left", padx=5)
            tk.Button(control_frame, text="Deselect All",
                     command=lambda: [var.set(False) for var in self.clip_vars]).pack(side="left", padx=5)

            # Generate selected clips button (fixed size for full text visibility)
            generate_selected_btn = tk.Button(button_frame, text="🎬 Generate Selected Clips",
                                            command=lambda: self.generate_selected_clips(preview_window),
                                            bg="#4CAF50", fg="white", font=("Arial", 11, "bold"),
                                            pady=8, width=25)
            generate_selected_btn.pack(fill="x", pady=5)

            # Close button
            tk.Button(button_frame, text="Close", command=preview_window.destroy).pack(pady=5)

        except Exception as e:
            messagebox.showerror("Preview Error", f"Error analyzing video: {str(e)}")
        finally:
            self.preview_btn.config(state="normal", text="🔍 Preview Clips")

    def generate_selected_clips(self, preview_window):
        """Generate only the selected clips from preview"""
        try:
            # Get selected clips
            selected_clips = []
            for i, (clip_var, clip_data) in enumerate(zip(self.clip_vars, self.preview_clips_data)):
                if clip_var.get():
                    selected_clips.append((i+1, clip_data))  # (clip_number, (start, end))

            if not selected_clips:
                messagebox.showwarning("Warning", "Please select at least one clip to generate!")
                return

            # Close preview window
            preview_window.destroy()

            # Show confirmation
            clip_count = len(selected_clips)
            clip_numbers = [str(num) for num, _ in selected_clips]
            confirm_msg = f"Generate {clip_count} selected clips (#{', #'.join(clip_numbers)})?"

            if not messagebox.askyesno("Confirm Generation", confirm_msg):
                return

            # Start generation in background thread
            self.selected_clips_data = [clip_data for _, clip_data in selected_clips]
            self.generate_btn.config(state="disabled", text="Generating Selected...")
            thread = threading.Thread(target=self.selected_generation_worker)
            thread.daemon = True
            thread.start()

        except Exception as e:
            messagebox.showerror("Generation Error", f"Error starting generation: {str(e)}")

    def selected_generation_worker(self):
        """Generate only selected clips"""
        try:
            # Check available disk space
            import shutil
            free_space_gb = shutil.disk_usage(self.output_folder)[2] / (1024**3)
            if free_space_gb < 1:
                raise RuntimeError(f"Insufficient disk space. Only {free_space_gb:.1f} GB available. Need at least 1 GB.")

            # Validate video first
            try:
                video_duration = validate_video(self.video_path, min_duration=self.duration_var.get() * 2)
                self.update_progress(f"✅ Video validated ({video_duration:.1f}s)", 5)
            except Exception as e:
                self.update_progress(f"❌ Video validation failed", 0)
                raise e

            os.makedirs(self.output_folder, exist_ok=True)

            selected_count = len(self.selected_clips_data)
            self.update_progress(f"📊 Generating {selected_count} selected clips", 10)

            for i, (start, end) in enumerate(self.selected_clips_data):
                self.update_progress(f"🗣️ Transcribing clip {i+1}/{selected_count}", 20 + (i * 30))

                subtitles = transcribe_and_extract_subtitles(self.video_path, start, end)
                out_path = os.path.join(self.output_folder, f"short_{i+1}.mp4")

                self.update_progress(f"🎬 Creating video {i+1}/{selected_count}", 40 + (i * 30))

                create_short_clip(self.video_path, start, end, subtitles, out_path)

            self.update_progress("✅ Selected clips generated successfully!", 100)
            messagebox.showinfo("Success", f"Successfully generated {selected_count} selected clips in '{self.output_folder}' folder!")

        except FileNotFoundError as e:
            messagebox.showerror("File Error", str(e))
        except ValueError as e:
            messagebox.showerror("Video Error", str(e))
        except RuntimeError as e:
            messagebox.showerror("System Error", str(e))
        except Exception as e:
            messagebox.showerror("Error", f"An unexpected error occurred: {str(e)}")
        finally:
            self.generate_btn.config(state="normal", text="🎬 Generate Shorts")
            self.progress_bar["value"] = 0
            self.progress_label.config(text="Ready to generate shorts")

    def update_progress(self, message, percent):
        self.progress_label.config(text=message)
        self.progress_bar["value"] = percent
        self.root.update()

    def generation_worker(self):
        try:
            # Check available disk space
            import shutil
            free_space_gb = shutil.disk_usage(self.output_folder)[2] / (1024**3)
            if free_space_gb < 1:
                raise RuntimeError(f"Insufficient disk space. Only {free_space_gb:.1f} GB available. Need at least 1 GB.")

            generate_shorts(
                self.video_path,
                max_clips=self.clips_var.get() if self.use_max_clips.get() else 10,  # Default max for non-loud modes
                output_folder=self.output_folder,
                progress_callback=self.update_progress,
                threshold_db=self.threshold_var.get(),
                clip_duration=self.duration_var.get(),
                detection_mode=self.detection_mode_var.get()
            )
            messagebox.showinfo("Success", f"Successfully generated shorts in '{self.output_folder}' folder!")
        except FileNotFoundError as e:
            messagebox.showerror("File Error", str(e))
        except ValueError as e:
            messagebox.showerror("Video Error", str(e))
        except RuntimeError as e:
            messagebox.showerror("System Error", str(e))
        except Exception as e:
            messagebox.showerror("Error", f"An unexpected error occurred: {str(e)}")
        finally:
            self.generate_btn.config(state="normal", text="🎬 Generate Shorts")
            self.progress_bar["value"] = 0
            self.progress_label.config(text="Ready to generate shorts")

    def start_generation(self):
        if not self.video_path:
            messagebox.showwarning("Warning", "Please select a video file first!")
            return

        self.generate_btn.config(state="disabled", text="Generating...")
        thread = threading.Thread(target=self.generation_worker)
        thread.daemon = True
        thread.start()

def run_gui():
    root = tk.Tk()
    app = ShortsGeneratorGUI(root)
    root.mainloop()

if __name__ == "__main__":
    import sys
    if len(sys.argv) > 1 and sys.argv[1] != "--gui":
        # Run command line mode
        try:
            generate_shorts(sys.argv[1])
            print("✅ Shorts generation completed successfully!")
        except Exception as e:
            print(f"❌ Error: {str(e)}")
    else:
        # Run GUI mode (default)
        run_gui()