Update requirements and enhance shorts generator with advanced detection modes and tooltips

2025-08-09 15:45:24 +02:00 · 2025-08-09 15:45:24 +02:00 · bd55be0448
commit bd55be0448
parent 491040b148
3 changed files with 601 additions and 36 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,6 @@
 moviepy>=1.0.3
 faster-whisper>=0.10.0
 numpy>=1.21.0
 opencv-python>=4.5.0
 scipy>=1.7.0
 librosa>=0.9.0
--- a/shorts_generator2.py
+++ b/shorts_generator2.py
@ -5,6 +5,150 @@ from faster_whisper import WhisperModel
 import tkinter as tk
 from tkinter import filedialog, messagebox, ttk
 import threading
 import cv2
 from scipy import signal
 import librosa
 class ToolTip:
    """Create a tooltip for a given widget"""
    def __init__(self, widget, text='widget info', side='right'):
        self.widget = widget
        self.text = text
        self.side = side
        self.widget.bind("<Enter>", self.enter)
        self.widget.bind("<Leave>", self.leave)
        self.tipwindow = None
    def enter(self, event=None):
        self.showtip()
    def leave(self, event=None):
        self.hidetip()
    def showtip(self):
        if self.tipwindow or not self.text:
            return
        # Get widget position
        x = self.widget.winfo_rootx()
        y = self.widget.winfo_rooty()
        w = self.widget.winfo_width()
        h = self.widget.winfo_height()
        # Position tooltip based on side preference
        if self.side == 'right':
            x = x + w + 10  # 10px to the right of widget
            y = y
        else:
            x = x + 25
            y = y + h + 5
        self.tipwindow = tw = tk.Toplevel(self.widget)
        tw.wm_overrideredirect(True)
        tw.wm_geometry("+%d+%d" % (x, y))
        label = tk.Label(tw, text=self.text, justify=tk.LEFT,
                        background="#ffffe0", relief=tk.SOLID, borderwidth=1,
                        font=("Arial", "9", "normal"), wraplength=350)
        label.pack(ipadx=5, ipady=3)
    def hidetip(self):
        tw = self.tipwindow
        self.tipwindow = None
        if tw:
            tw.destroy()
 class ComboboxTooltip:
    """Special tooltip for combobox that shows on listbox hover"""
    def __init__(self, combobox, descriptions):
        self.combobox = combobox
        self.descriptions = descriptions
        self.tipwindow = None
        self.bound_listbox = None
        # Bind to combobox events
        self.combobox.bind("<Button-1>", self.on_click)
        self.combobox.bind("<KeyPress>", self.on_keypress)
    def on_click(self, event):
        # Try to find the listbox when dropdown opens
        self.combobox.after(50, self.bind_listbox)
    def on_keypress(self, event):
        # Handle keyboard navigation
        self.combobox.after(50, self.bind_listbox)
    def bind_listbox(self):
        # Find the listbox widget more reliably
        try:
            # Look through all toplevel windows for the combobox popdown
            for window in self.combobox.winfo_toplevel().winfo_children():
                window_class = window.winfo_class()
                if window_class == 'Toplevel':
                    # Found a toplevel, look for listbox inside
                    for child in window.winfo_children():
                        if child.winfo_class() == 'Listbox':
                            if self.bound_listbox != child:
                                self.bound_listbox = child
                                child.bind("<Motion>", self.on_listbox_motion)
                                child.bind("<Leave>", self.on_listbox_leave)
                                child.bind("<ButtonRelease-1>", self.on_listbox_leave)
                            return
        except Exception as e:
            # Fallback method - try to find any listbox
            try:
                # Alternative approach: look for the popdown frame
                for child in self.combobox.tk.call('winfo', 'children', '.'):
                    if 'popdown' in str(child):
                        popdown = self.combobox.nametowidget(child)
                        for subchild in popdown.winfo_children():
                            if subchild.winfo_class() == 'Listbox':
                                if self.bound_listbox != subchild:
                                    self.bound_listbox = subchild
                                    subchild.bind("<Motion>", self.on_listbox_motion)
                                    subchild.bind("<Leave>", self.on_listbox_leave)
                                    subchild.bind("<ButtonRelease-1>", self.on_listbox_leave)
                                return
            except:
                pass
    def on_listbox_motion(self, event):
        try:
            listbox = event.widget
            index = listbox.nearest(event.y)
            if 0 <= index < len(self.combobox['values']):
                selection = self.combobox['values'][index]
                if selection in self.descriptions:
                    self.show_tooltip(event, self.descriptions[selection])
        except Exception:
            pass
    def on_listbox_leave(self, event):
        self.hide_tooltip()
    def show_tooltip(self, event, text):
        self.hide_tooltip()  # Hide any existing tooltip
        try:
            x = event.widget.winfo_rootx() + event.widget.winfo_width() + 10
            y = event.widget.winfo_rooty() + event.y - 20
            self.tipwindow = tw = tk.Toplevel(event.widget)
            tw.wm_overrideredirect(True)
            tw.wm_geometry("+%d+%d" % (x, y))
            label = tk.Label(tw, text=text, justify=tk.LEFT,
                            background="#ffffe0", relief=tk.SOLID, borderwidth=1,
                            font=("Arial", "9", "normal"), wraplength=350)
            label.pack(ipadx=5, ipady=3)
        except Exception:
            pass
    def hide_tooltip(self):
        if self.tipwindow:
            try:
                self.tipwindow.destroy()
            except:
                pass
            self.tipwindow = None
 def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
    print("🔍 Analyzing audio...")
@ -28,6 +172,208 @@ def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
    clip.close()
    return loud_chunks
 def detect_scene_changes(video_path, chunk_duration=5, threshold=0.3):
    """Detect dramatic visual scene changes"""
    print("🎬 Analyzing scene changes...")
    clip = VideoFileClip(video_path)
    # Sample frames at regular intervals
    sample_rate = 2  # Check every 2 seconds
    times = np.arange(0, clip.duration, sample_rate)
    scene_changes = []
    prev_frame = None
    for i, t in enumerate(times[:-1]):
        try:
            # Get current and next frame
            frame1 = clip.get_frame(t)
            frame2 = clip.get_frame(times[i + 1])
            # Convert to grayscale and resize for faster processing
            gray1 = cv2.cvtColor(frame1, cv2.COLOR_RGB2GRAY)
            gray2 = cv2.cvtColor(frame2, cv2.COLOR_RGB2GRAY)
            gray1 = cv2.resize(gray1, (160, 90))  # Small size for speed
            gray2 = cv2.resize(gray2, (160, 90))
            # Calculate structural similarity difference
            diff = np.mean(np.abs(gray1.astype(float) - gray2.astype(float))) / 255.0
            if diff > threshold:
                start = max(0, t - chunk_duration/2)
                end = min(clip.duration, t + chunk_duration/2)
                scene_changes.append((start, end))
        except Exception as e:
            print(f"⚠️ Frame analysis error at {t:.1f}s: {e}")
            continue
    print(f"🎬 Found {len(scene_changes)} scene changes")
    clip.close()
    return scene_changes
 def detect_motion_intensity(video_path, chunk_duration=5, threshold=0.15):
    """Detect high motion/action moments"""
    print("🏃 Analyzing motion intensity...")
    clip = VideoFileClip(video_path)
    sample_rate = 1  # Check every second
    times = np.arange(0, clip.duration - 1, sample_rate)
    motion_moments = []
    for t in times:
        try:
            # Get two consecutive frames
            frame1 = clip.get_frame(t)
            frame2 = clip.get_frame(t + 0.5)  # Half second later
            # Convert to grayscale and resize
            gray1 = cv2.cvtColor(frame1, cv2.COLOR_RGB2GRAY)
            gray2 = cv2.cvtColor(frame2, cv2.COLOR_RGB2GRAY)
            gray1 = cv2.resize(gray1, (160, 90))
            gray2 = cv2.resize(gray2, (160, 90))
            # Calculate optical flow magnitude
            flow = cv2.calcOpticalFlowPyrLK(gray1, gray2, 
                                           np.random.randint(0, 160, (100, 1, 2)).astype(np.float32),
                                           None)[0]
            if flow is not None:
                motion_magnitude = np.mean(np.linalg.norm(flow.reshape(-1, 2), axis=1))
                if motion_magnitude > threshold:
                    start = max(0, t - chunk_duration/2)
                    end = min(clip.duration, t + chunk_duration/2)
                    motion_moments.append((start, end))
        except Exception as e:
            print(f"⚠️ Motion analysis error at {t:.1f}s: {e}")
            continue
    print(f"🏃 Found {len(motion_moments)} high-motion moments")
    clip.close()
    return motion_moments
 def detect_speech_emotion(video_path, chunk_duration=5):
    """Detect emotional/excited speech patterns"""
    print("😄 Analyzing speech emotions...")
    # Use Whisper to get detailed speech analysis
    model = WhisperModel("base", device="cpu", compute_type="int8")
    segments, _ = model.transcribe(video_path, beam_size=5, vad_filter=True, word_timestamps=True)
    emotional_moments = []
    excitement_keywords = ['wow', 'amazing', 'incredible', 'unbelievable', 'awesome', 'fantastic', 
                          'omg', 'what', 'no way', 'crazy', 'insane', 'perfect', 'yes', 'exactly']
    for segment in segments:
        text = segment.text.lower()
        # Check for excitement keywords
        has_keywords = any(keyword in text for keyword in excitement_keywords)
        # Check for multiple exclamation-worthy patterns
        has_caps = any(word.isupper() for word in segment.text.split())
        has_punctuation = '!' in segment.text or '?' in segment.text
        is_short_excited = len(text.split()) <= 5 and (has_keywords or has_caps)
        if has_keywords or has_punctuation or is_short_excited:
            start = max(0, segment.start - chunk_duration/2)
            end = min(segment.end + chunk_duration/2, segment.end + chunk_duration)
            emotional_moments.append((start, end))
    print(f"😄 Found {len(emotional_moments)} emotional speech moments")
    return emotional_moments
 def detect_audio_peaks(video_path, chunk_duration=5):
    """Detect sudden audio peaks (bass drops, beats, impacts)"""
    print("🎵 Analyzing audio peaks...")
    clip = VideoFileClip(video_path)
    audio = clip.audio.to_soundarray(fps=22050)  # Lower sample rate for speed
    # Convert to mono if stereo
    if len(audio.shape) > 1:
        audio = np.mean(audio, axis=1)
    # Find spectral peaks (bass, treble spikes)
    peaks, _ = signal.find_peaks(np.abs(audio), height=np.percentile(np.abs(audio), 95))
    peak_moments = []
    prev_peak = 0
    for peak in peaks:
        peak_time = peak / 22050
        # Avoid too close peaks
        if peak_time - prev_peak > chunk_duration:
            start = max(0, peak_time - chunk_duration/2)
            end = min(clip.duration, peak_time + chunk_duration/2)
            peak_moments.append((start, end))
            prev_peak = peak_time
    print(f"🎵 Found {len(peak_moments)} audio peak moments")
    clip.close()
    return peak_moments
 def detect_combined_intensity(video_path, chunk_duration=5, weights=None):
    """Combine multiple detection methods for best moments"""
    print("🎯 Running comprehensive moment analysis...")
    if weights is None:
        weights = {'loud': 0.3, 'scene': 0.2, 'motion': 0.2, 'speech': 0.2, 'peaks': 0.1}
    # Get all detection results
    loud_moments = detect_loud_moments(video_path, chunk_duration, threshold_db=5)  # Lower threshold
    scene_moments = detect_scene_changes(video_path, chunk_duration)
    motion_moments = detect_motion_intensity(video_path, chunk_duration)
    speech_moments = detect_speech_emotion(video_path, chunk_duration)
    peak_moments = detect_audio_peaks(video_path, chunk_duration)
    # Create time-based scoring
    clip = VideoFileClip(video_path)
    duration = clip.duration
    clip.close()
    # Score each second of the video
    time_scores = {}
    for moments, weight in [(loud_moments, weights['loud']), 
                           (scene_moments, weights['scene']),
                           (motion_moments, weights['motion']),
                           (speech_moments, weights['speech']),
                           (peak_moments, weights['peaks'])]:
        for start, end in moments:
            for t in range(int(start), int(end) + 1):
                if t not in time_scores:
                    time_scores[t] = 0
                time_scores[t] += weight
    # Find the highest scoring segments
    if not time_scores:
        return loud_moments  # Fallback to loud moments
    # Get top scoring time periods
    sorted_times = sorted(time_scores.items(), key=lambda x: x[1], reverse=True)
    combined_moments = []
    used_times = set()
    for time_sec, score in sorted_times:
        if time_sec not in used_times and score > 0.3:  # Minimum threshold
            start = max(0, time_sec - chunk_duration/2)
            end = min(duration, time_sec + chunk_duration/2)
            combined_moments.append((start, end))
            # Mark nearby times as used to avoid overlap
            for t in range(max(0, time_sec - chunk_duration), 
                          min(int(duration), time_sec + chunk_duration)):
                used_times.add(t)
    print(f"🎯 Found {len(combined_moments)} high-intensity combined moments")
    return combined_moments
 def transcribe_and_extract_subtitles(video_path, start, end):
    print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...")
    model = WhisperModel("base", device="cpu", compute_type="int8")
@ -150,7 +496,8 @@ def validate_video(video_path, min_duration=30):
        else:
            raise ValueError(f"Error reading video: {str(e)}")
-def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_callback=None, threshold_db=-30, clip_duration=5):
+def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_callback=None, 
                   threshold_db=-30, clip_duration=5, detection_mode="loud"):
    os.makedirs(output_folder, exist_ok=True)
    # Validate video first
@ -163,14 +510,42 @@ def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_ca
            progress_callback(f"❌ Video validation failed", 0)
        raise e
-    if progress_callback:
+    # Choose detection method based on mode
-        progress_callback("🔍 Analyzing audio for loud moments...", 10)
+    if detection_mode == "loud":
        if progress_callback:
            progress_callback("🔍 Analyzing audio for loud moments...", 10)
        best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)
    elif detection_mode == "scene":
        if progress_callback:
            progress_callback("🎬 Analyzing scene changes...", 10)
        best_moments = detect_scene_changes(video_path, chunk_duration=clip_duration)
    elif detection_mode == "motion":
        if progress_callback:
            progress_callback("🏃 Analyzing motion intensity...", 10)
        best_moments = detect_motion_intensity(video_path, chunk_duration=clip_duration)
    elif detection_mode == "speech":
        if progress_callback:
            progress_callback("😄 Analyzing speech emotions...", 10)
        best_moments = detect_speech_emotion(video_path, chunk_duration=clip_duration)
    elif detection_mode == "peaks":
        if progress_callback:
            progress_callback("🎵 Analyzing audio peaks...", 10)
        best_moments = detect_audio_peaks(video_path, chunk_duration=clip_duration)
    elif detection_mode == "combined":
        if progress_callback:
            progress_callback("🎯 Running comprehensive analysis...", 10)
        best_moments = detect_combined_intensity(video_path, chunk_duration=clip_duration)
    else:
        best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)
    best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)
    selected = best_moments[:max_clips]
    if not selected:
-        raise ValueError(f"No loud moments found with threshold {threshold_db} dB. Try lowering the threshold or use a different video.")
+        mode_name = {
            "loud": "loud moments", "scene": "scene changes", "motion": "motion intensity",
            "speech": "emotional speech", "peaks": "audio peaks", "combined": "interesting moments"
        }.get(detection_mode, "moments")
        raise ValueError(f"No {mode_name} found. Try a different detection mode or adjust settings.")
    if progress_callback:
        progress_callback(f"📊 Found {len(selected)} clips to generate", 20)
@ -194,8 +569,9 @@ def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_ca
 class ShortsGeneratorGUI:
    def __init__(self, root):
        self.root = root
-        self.root.title("AI Shorts Generator")
+        self.root.title("🎬 AI Shorts Generator - Advanced Video Moment Detection")
-        self.root.geometry("500x400")
+        self.root.geometry("650x650")  # Reduced height to eliminate empty space
        self.root.minsize(600, 600)  # Reduced minimum size
        self.video_path = None
        self.output_folder = "shorts"
@ -265,43 +641,202 @@ class ShortsGeneratorGUI:
        self.use_max_clips.trace("w", lambda *args: toggle_clips_limit())
        clips_checkbox.config(command=toggle_clips_limit)
-        # Audio threshold
+        # Add tooltip for max clips setting
        clips_tooltip_text = """Max Clips Control:
 • Checked: Limit the number of clips generated
 • Unchecked: Generate all detected moments
 • 1-3 clips: Quick highlights for social media
 • 4-6 clips: Good variety pack
 • 7-10 clips: Comprehensive highlight reel
 Tip: Start with 3 clips, then increase if you want more content"""
        ToolTip(self.clips_spinbox, clips_tooltip_text, side='right')
        ToolTip(clips_checkbox, clips_tooltip_text, side='right')
        # Detection Mode Selection
        detection_frame = tk.Frame(settings_frame)
        detection_frame.pack(fill="x", pady=5)
        tk.Label(detection_frame, text="Detection Mode:", font=("Arial", 9, "bold")).pack(side="left")
        self.detection_mode_var = tk.StringVar(value="loud")
        self.detection_display_var = tk.StringVar(value="🔊 Loud Moments")
        detection_dropdown = ttk.Combobox(detection_frame, textvariable=self.detection_display_var, 
                                          values=["🔊 Loud Moments", "🎬 Scene Changes", "🏃 Motion Intensity", 
                                                "😄 Emotional Speech", "🎵 Audio Peaks", "🎯 Smart Combined"],
                                          state="readonly", width=22)
        detection_dropdown.pack(side="right")
        # Store the mapping between display text and internal values
        self.mode_mapping = {
            "🔊 Loud Moments": "loud",
            "🎬 Scene Changes": "scene", 
            "🏃 Motion Intensity": "motion",
            "😄 Emotional Speech": "speech",
            "🎵 Audio Peaks": "peaks",
            "🎯 Smart Combined": "combined"
        }
        # Simple, clear descriptions for mode tooltips
        mode_descriptions = {
            "🔊 Loud Moments": """Analyzes audio volume levels to find the loudest parts of your video.
 • Best for: Gaming reactions, music highlights, shouting moments
 • Finds: High-volume audio segments above the threshold
 • Ideal when: Your video has clear volume differences
 • Tip: Adjust threshold if too many/few moments found""",
            "🎬 Scene Changes": """Detects dramatic visual transitions and cuts in your video.
 • Best for: Movie trailers, montages, location changes
 • Finds: Major visual shifts between frames
 • Ideal when: Video has multiple scenes or camera angles
 • Tip: Great for content with quick cuts or transitions""",
            "🏃 Motion Intensity": """Analyzes movement and action within video frames.
 • Best for: Sports highlights, dance videos, action scenes
 • Finds: High-movement moments with lots of visual activity
 • Ideal when: Video contains physical action or movement
 • Tip: Perfect for extracting the most dynamic moments""",
            "😄 Emotional Speech": """Uses AI to detect excited, emotional, or emphatic speech patterns.
 • Best for: Reactions, reviews, commentary, tutorials
 • Finds: Words like 'wow', 'amazing', exclamations, excited tone
 • Ideal when: Video has spoken content with emotional moments
 • Tip: Captures the most engaging verbal reactions""",
            "🎵 Audio Peaks": """Detects sudden audio spikes like bass drops, impacts, or sound effects.
 • Best for: Music videos, sound effect moments, beat drops
 • Finds: Sharp increases in audio frequency or volume
 • Ideal when: Video has musical elements or sound effects
 • Tip: Great for rhythm-based or audio-driven content""",
            "🎯 Smart Combined": """Intelligently combines all detection methods for optimal results.
 • Best for: Any video type, general content, unsure what to use
 • Finds: Moments scoring high across multiple analysis methods
 • Ideal when: You want the most 'interesting' overall moments
 • Tip: Recommended starting point for most videos"""
        }
        # Create tooltip for the dropdown (updates when selection changes)
        current_tooltip_text = mode_descriptions["🔊 Loud Moments"]  # Default
        dropdown_tooltip = ToolTip(detection_dropdown, current_tooltip_text)
        # Update tooltip when selection changes
        def on_detection_change(event):
            selection = detection_dropdown.get()
            mode_map = {
                "🔊 Loud Moments": "loud",
                "🎬 Scene Changes": "scene", 
                "🏃 Motion Intensity": "motion",
                "😄 Emotional Speech": "speech",
                "🎵 Audio Peaks": "peaks",
                "🎯 Smart Combined": "combined"
            }
            self.detection_mode_var.set(mode_map.get(selection, "loud"))
            # Update tooltip text for the selected mode
            dropdown_tooltip.text = mode_descriptions.get(selection, "Select a detection mode")
            # Show/hide threshold setting based on mode
            if selection == "🔊 Loud Moments":
                threshold_frame.pack(fill="x", pady=5)
            else:
                threshold_frame.pack_forget()
        detection_dropdown.bind("<<ComboboxSelected>>", on_detection_change)
        # Audio threshold (only shown for loud moments)
        threshold_frame = tk.Frame(settings_frame)
        threshold_frame.pack(fill="x", pady=5)
-        tk.Label(threshold_frame, text="Audio Threshold (dB):").pack(side="left")
+        threshold_label = tk.Label(threshold_frame, text="Audio Threshold (dB):")
        threshold_label.pack(side="left")
        self.threshold_var = tk.IntVar(value=-30)
        threshold_spinbox = tk.Spinbox(threshold_frame, from_=-50, to=0, width=5, textvariable=self.threshold_var)
        threshold_spinbox.pack(side="right")
        # Add tooltip for threshold setting
        threshold_tooltip_text = """Audio Threshold Control:
 • Higher values (closer to 0): Only very loud moments
 • Lower values (closer to -50): More moments detected
 • Default -30 dB: Good balance for most videos
 • Adjust based on your video's audio levels
 Example: Gaming videos might need -20 dB, quiet vlogs might need -40 dB"""
        ToolTip(threshold_spinbox, threshold_tooltip_text, side='right')
        # Clip duration (increased to 120 seconds max)
        duration_frame = tk.Frame(settings_frame)
        duration_frame.pack(fill="x", pady=5)
-        tk.Label(duration_frame, text="Clip Duration (seconds):").pack(side="left")
+        duration_label = tk.Label(duration_frame, text="Clip Duration (seconds):")
        duration_label.pack(side="left")
        self.duration_var = tk.IntVar(value=5)
        duration_spinbox = tk.Spinbox(duration_frame, from_=3, to=120, width=5, textvariable=self.duration_var)
        duration_spinbox.pack(side="right")
        # Add tooltip for duration setting
        duration_tooltip_text = """Clip Duration Setting:
 • 3-10 seconds: Perfect for TikTok/Instagram Reels
 • 10-30 seconds: Good for YouTube Shorts
 • 30-60 seconds: Longer form highlights
 • 60+ seconds: Extended content clips
 Shorter clips = more viral potential
 Longer clips = more context and story"""
        ToolTip(duration_spinbox, duration_tooltip_text, side='right')
        # Preview button
        self.preview_btn = tk.Button(self.root, text="🔍 Preview Clips", 
                                    command=self.preview_clips, bg="#2196F3", fg="white", 
                                    font=("Arial", 10, "bold"), pady=5)
-        self.preview_btn.pack(pady=10)
+        self.preview_btn.pack(pady=5)
        # Add tooltip for preview button
        preview_tooltip_text = """Preview Clips Feature:
 • Analyzes your video using the selected detection mode
 • Shows all detected moments with timestamps
 • Lets you select specific clips to generate
 • No video files created - just analysis
 • Great for testing settings before full generation
 Tip: Always preview first to see what the AI finds!"""
        ToolTip(self.preview_btn, preview_tooltip_text, side='right')
        # Generate button
        self.generate_btn = tk.Button(self.root, text="🎬 Generate Shorts", 
                                     command=self.start_generation, bg="#4CAF50", fg="white", 
                                     font=("Arial", 12, "bold"), pady=10)
-        self.generate_btn.pack(pady=20)
+        self.generate_btn.pack(pady=10)
        # Add tooltip for generate button
        generate_tooltip_text = """Generate Shorts Feature:
 • Creates actual video files from detected moments
 • Adds AI-generated subtitles to each clip
 • Formats videos for vertical social media (1080x1920)
 • Saves clips to your selected output folder
 • Takes longer but creates ready-to-post content
 Tip: Use Preview first to fine-tune your settings!"""
        ToolTip(self.generate_btn, generate_tooltip_text, side='right')
        # Progress frame
        progress_frame = tk.Frame(self.root)
-        progress_frame.pack(pady=10, padx=20, fill="x")
+        progress_frame.pack(pady=5, padx=20, fill="x")
        self.progress_label = tk.Label(progress_frame, text="Ready to generate shorts")
        self.progress_label.pack()
        self.progress_bar = ttk.Progressbar(progress_frame, length=400, mode="determinate")
-        self.progress_bar.pack(pady=5)
+        self.progress_bar.pack(pady=3)
    def select_video(self):
        file_path = filedialog.askopenfilename(
@ -327,18 +862,44 @@ class ShortsGeneratorGUI:
            # Validate video first
            validate_video(self.video_path, min_duration=self.duration_var.get() * 2)
-            # Analyze for loud moments
+            # Analyze using selected detection mode
            self.preview_btn.config(state="disabled", text="Analyzing...")
            self.root.update()
-            loud_moments = detect_loud_moments(
+            detection_mode = self.detection_mode_var.get()
                self.video_path, 
                chunk_duration=self.duration_var.get(), 
                threshold_db=self.threshold_var.get()
            )
-            if not loud_moments:
+            if detection_mode == "loud":
-                messagebox.showinfo("Preview", f"No loud moments found with threshold {self.threshold_var.get()} dB.\nTry lowering the threshold.")
+                moments = detect_loud_moments(
                    self.video_path, 
                    chunk_duration=self.duration_var.get(), 
                    threshold_db=self.threshold_var.get()
                )
                mode_name = "loud moments"
            elif detection_mode == "scene":
                moments = detect_scene_changes(self.video_path, chunk_duration=self.duration_var.get())
                mode_name = "scene changes"
            elif detection_mode == "motion":
                moments = detect_motion_intensity(self.video_path, chunk_duration=self.duration_var.get())
                mode_name = "motion moments"
            elif detection_mode == "speech":
                moments = detect_speech_emotion(self.video_path, chunk_duration=self.duration_var.get())
                mode_name = "emotional speech"
            elif detection_mode == "peaks":
                moments = detect_audio_peaks(self.video_path, chunk_duration=self.duration_var.get())
                mode_name = "audio peaks"
            elif detection_mode == "combined":
                moments = detect_combined_intensity(self.video_path, chunk_duration=self.duration_var.get())
                mode_name = "interesting moments"
            else:
                moments = detect_loud_moments(
                    self.video_path, 
                    chunk_duration=self.duration_var.get(), 
                    threshold_db=self.threshold_var.get()
                )
                mode_name = "loud moments"
            if not moments:
                messagebox.showinfo("Preview", f"No {mode_name} found.\nTry a different detection mode or adjust settings.")
                return
            # Show preview window
@ -346,7 +907,7 @@ class ShortsGeneratorGUI:
            preview_window.title("Preview and Select Clips")
            preview_window.geometry("500x400")
-            tk.Label(preview_window, text=f"Found {len(loud_moments)} loud moments:", font=("Arial", 12, "bold")).pack(pady=10)
+            tk.Label(preview_window, text=f"Found {len(moments)} {mode_name}:", font=("Arial", 12, "bold")).pack(pady=10)
            # Create scrollable frame for checkboxes
            canvas = tk.Canvas(preview_window)
@ -364,7 +925,7 @@ class ShortsGeneratorGUI:
            # Store checkbox variables and clip data
            self.clip_vars = []
            # Use all clips if max clips is disabled, otherwise limit by setting
-            clips_to_show = loud_moments if not self.use_max_clips.get() else loud_moments[:self.clips_var.get()]
+            clips_to_show = moments if not self.use_max_clips.get() else moments[:self.clips_var.get()]
            self.preview_clips_data = clips_to_show
            # Add selectable clips with checkboxes
@ -510,11 +1071,12 @@ class ShortsGeneratorGUI:
            generate_shorts(
                self.video_path, 
-                max_clips=self.clips_var.get() if self.use_max_clips.get() else len(detect_loud_moments(self.video_path, chunk_duration=self.duration_var.get(), threshold_db=self.threshold_var.get())),
+                max_clips=self.clips_var.get() if self.use_max_clips.get() else 10,  # Default max for non-loud modes
                output_folder=self.output_folder,
                progress_callback=self.update_progress,
                threshold_db=self.threshold_var.get(),
-                clip_duration=self.duration_var.get()
+                clip_duration=self.duration_var.get(),
                detection_mode=self.detection_mode_var.get()
            )
            messagebox.showinfo("Success", f"Successfully generated shorts in '{self.output_folder}' folder!")
        except FileNotFoundError as e:
@ -547,18 +1109,14 @@ def run_gui():
 if __name__ == "__main__":
    import sys
-    if len(sys.argv) > 1 and sys.argv[1] == "--gui":
+    if len(sys.argv) > 1 and sys.argv[1] != "--gui":
        # Run GUI mode
        run_gui()
    elif len(sys.argv) < 2:
        print("Usage: python shorts_generator2.py your_video.mp4")
        print("   or: python shorts_generator2.py --gui")
        run_gui()  # Default to GUI if no args
    else:
        # Run command line mode
        try:
            generate_shorts(sys.argv[1])
            print("✅ Shorts generation completed successfully!")
        except Exception as e:
            print(f"❌ Error: {str(e)}")
    else:
        # Run GUI mode (default)
        run_gui()
--- a/sub2.srt
+++ b/sub2.srt
@ -1,4 +1,8 @@
 1
-00:00:00,000 --> 00:00:08,250
+00:00:00,000 --> 00:00:01,280
-Yeah, yeah, level she's 24.
+Yeah! Yeah!
 2
 00:00:06,000 --> 00:00:07,809
 FLAVOR CHEESE 24!