Update requirements and enhance shorts generator with advanced detection modes and tooltips

2025-08-09 15:45:24 +02:00 · 2025-08-09 15:45:24 +02:00 · bd55be0448
commit bd55be0448
parent 491040b148
3 changed files with 601 additions and 36 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,6 @@
 moviepy>=1.0.3
 faster-whisper>=0.10.0
 numpy>=1.21.0
+opencv-python>=4.5.0
+scipy>=1.7.0
+librosa>=0.9.0
--- a/shorts_generator2.py
+++ b/shorts_generator2.py
@ -5,6 +5,150 @@ from faster_whisper import WhisperModel
 import tkinter as tk
 from tkinter import filedialog, messagebox, ttk
 import threading
+import cv2
+from scipy import signal
+import librosa
+
+class ToolTip:
+    """Create a tooltip for a given widget"""
+    def __init__(self, widget, text='widget info', side='right'):
+        self.widget = widget
+        self.text = text
+        self.side = side
+        self.widget.bind("<Enter>", self.enter)
+        self.widget.bind("<Leave>", self.leave)
+        self.tipwindow = None
+
+    def enter(self, event=None):
+        self.showtip()
+
+    def leave(self, event=None):
+        self.hidetip()
+
+    def showtip(self):
+        if self.tipwindow or not self.text:
+            return
+        
+        # Get widget position
+        x = self.widget.winfo_rootx()
+        y = self.widget.winfo_rooty()
+        w = self.widget.winfo_width()
+        h = self.widget.winfo_height()
+        
+        # Position tooltip based on side preference
+        if self.side == 'right':
+            x = x + w + 10  # 10px to the right of widget
+            y = y
+        else:
+            x = x + 25
+            y = y + h + 5
+        
+        self.tipwindow = tw = tk.Toplevel(self.widget)
+        tw.wm_overrideredirect(True)
+        tw.wm_geometry("+%d+%d" % (x, y))
+        label = tk.Label(tw, text=self.text, justify=tk.LEFT,
+                        background="#ffffe0", relief=tk.SOLID, borderwidth=1,
+                        font=("Arial", "9", "normal"), wraplength=350)
+        label.pack(ipadx=5, ipady=3)
+
+    def hidetip(self):
+        tw = self.tipwindow
+        self.tipwindow = None
+        if tw:
+            tw.destroy()
+
+class ComboboxTooltip:
+    """Special tooltip for combobox that shows on listbox hover"""
+    def __init__(self, combobox, descriptions):
+        self.combobox = combobox
+        self.descriptions = descriptions
+        self.tipwindow = None
+        self.bound_listbox = None
+        
+        # Bind to combobox events
+        self.combobox.bind("<Button-1>", self.on_click)
+        self.combobox.bind("<KeyPress>", self.on_keypress)
+        
+    def on_click(self, event):
+        # Try to find the listbox when dropdown opens
+        self.combobox.after(50, self.bind_listbox)
+        
+    def on_keypress(self, event):
+        # Handle keyboard navigation
+        self.combobox.after(50, self.bind_listbox)
+        
+    def bind_listbox(self):
+        # Find the listbox widget more reliably
+        try:
+            # Look through all toplevel windows for the combobox popdown
+            for window in self.combobox.winfo_toplevel().winfo_children():
+                window_class = window.winfo_class()
+                if window_class == 'Toplevel':
+                    # Found a toplevel, look for listbox inside
+                    for child in window.winfo_children():
+                        if child.winfo_class() == 'Listbox':
+                            if self.bound_listbox != child:
+                                self.bound_listbox = child
+                                child.bind("<Motion>", self.on_listbox_motion)
+                                child.bind("<Leave>", self.on_listbox_leave)
+                                child.bind("<ButtonRelease-1>", self.on_listbox_leave)
+                            return
+        except Exception as e:
+            # Fallback method - try to find any listbox
+            try:
+                # Alternative approach: look for the popdown frame
+                for child in self.combobox.tk.call('winfo', 'children', '.'):
+                    if 'popdown' in str(child):
+                        popdown = self.combobox.nametowidget(child)
+                        for subchild in popdown.winfo_children():
+                            if subchild.winfo_class() == 'Listbox':
+                                if self.bound_listbox != subchild:
+                                    self.bound_listbox = subchild
+                                    subchild.bind("<Motion>", self.on_listbox_motion)
+                                    subchild.bind("<Leave>", self.on_listbox_leave)
+                                    subchild.bind("<ButtonRelease-1>", self.on_listbox_leave)
+                                return
+            except:
+                pass
+    
+    def on_listbox_motion(self, event):
+        try:
+            listbox = event.widget
+            index = listbox.nearest(event.y)
+            if 0 <= index < len(self.combobox['values']):
+                selection = self.combobox['values'][index]
+                if selection in self.descriptions:
+                    self.show_tooltip(event, self.descriptions[selection])
+        except Exception:
+            pass
+    
+    def on_listbox_leave(self, event):
+        self.hide_tooltip()
+    
+    def show_tooltip(self, event, text):
+        self.hide_tooltip()  # Hide any existing tooltip
+        
+        try:
+            x = event.widget.winfo_rootx() + event.widget.winfo_width() + 10
+            y = event.widget.winfo_rooty() + event.y - 20
+            
+            self.tipwindow = tw = tk.Toplevel(event.widget)
+            tw.wm_overrideredirect(True)
+            tw.wm_geometry("+%d+%d" % (x, y))
+            label = tk.Label(tw, text=text, justify=tk.LEFT,
+                            background="#ffffe0", relief=tk.SOLID, borderwidth=1,
+                            font=("Arial", "9", "normal"), wraplength=350)
+            label.pack(ipadx=5, ipady=3)
+        except Exception:
+            pass
+    
+    def hide_tooltip(self):
+        if self.tipwindow:
+            try:
+                self.tipwindow.destroy()
+            except:
+                pass
+            self.tipwindow = None

 def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
    print("🔍 Analyzing audio...")
@ -28,6 +172,208 @@ def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
    clip.close()
    return loud_chunks

+def detect_scene_changes(video_path, chunk_duration=5, threshold=0.3):
+    """Detect dramatic visual scene changes"""
+    print("🎬 Analyzing scene changes...")
+    clip = VideoFileClip(video_path)
+    
+    # Sample frames at regular intervals
+    sample_rate = 2  # Check every 2 seconds
+    times = np.arange(0, clip.duration, sample_rate)
+    
+    scene_changes = []
+    prev_frame = None
+    
+    for i, t in enumerate(times[:-1]):
+        try:
+            # Get current and next frame
+            frame1 = clip.get_frame(t)
+            frame2 = clip.get_frame(times[i + 1])
+            
+            # Convert to grayscale and resize for faster processing
+            gray1 = cv2.cvtColor(frame1, cv2.COLOR_RGB2GRAY)
+            gray2 = cv2.cvtColor(frame2, cv2.COLOR_RGB2GRAY)
+            gray1 = cv2.resize(gray1, (160, 90))  # Small size for speed
+            gray2 = cv2.resize(gray2, (160, 90))
+            
+            # Calculate structural similarity difference
+            diff = np.mean(np.abs(gray1.astype(float) - gray2.astype(float))) / 255.0
+            
+            if diff > threshold:
+                start = max(0, t - chunk_duration/2)
+                end = min(clip.duration, t + chunk_duration/2)
+                scene_changes.append((start, end))
+                
+        except Exception as e:
+            print(f"⚠️ Frame analysis error at {t:.1f}s: {e}")
+            continue
+    
+    print(f"🎬 Found {len(scene_changes)} scene changes")
+    clip.close()
+    return scene_changes
+
+def detect_motion_intensity(video_path, chunk_duration=5, threshold=0.15):
+    """Detect high motion/action moments"""
+    print("🏃 Analyzing motion intensity...")
+    clip = VideoFileClip(video_path)
+    
+    sample_rate = 1  # Check every second
+    times = np.arange(0, clip.duration - 1, sample_rate)
+    
+    motion_moments = []
+    
+    for t in times:
+        try:
+            # Get two consecutive frames
+            frame1 = clip.get_frame(t)
+            frame2 = clip.get_frame(t + 0.5)  # Half second later
+            
+            # Convert to grayscale and resize
+            gray1 = cv2.cvtColor(frame1, cv2.COLOR_RGB2GRAY)
+            gray2 = cv2.cvtColor(frame2, cv2.COLOR_RGB2GRAY)
+            gray1 = cv2.resize(gray1, (160, 90))
+            gray2 = cv2.resize(gray2, (160, 90))
+            
+            # Calculate optical flow magnitude
+            flow = cv2.calcOpticalFlowPyrLK(gray1, gray2, 
+                                           np.random.randint(0, 160, (100, 1, 2)).astype(np.float32),
+                                           None)[0]
+            
+            if flow is not None:
+                motion_magnitude = np.mean(np.linalg.norm(flow.reshape(-1, 2), axis=1))
+                
+                if motion_magnitude > threshold:
+                    start = max(0, t - chunk_duration/2)
+                    end = min(clip.duration, t + chunk_duration/2)
+                    motion_moments.append((start, end))
+                    
+        except Exception as e:
+            print(f"⚠️ Motion analysis error at {t:.1f}s: {e}")
+            continue
+    
+    print(f"🏃 Found {len(motion_moments)} high-motion moments")
+    clip.close()
+    return motion_moments
+
+def detect_speech_emotion(video_path, chunk_duration=5):
+    """Detect emotional/excited speech patterns"""
+    print("😄 Analyzing speech emotions...")
+    
+    # Use Whisper to get detailed speech analysis
+    model = WhisperModel("base", device="cpu", compute_type="int8")
+    segments, _ = model.transcribe(video_path, beam_size=5, vad_filter=True, word_timestamps=True)
+    
+    emotional_moments = []
+    excitement_keywords = ['wow', 'amazing', 'incredible', 'unbelievable', 'awesome', 'fantastic', 
+                          'omg', 'what', 'no way', 'crazy', 'insane', 'perfect', 'yes', 'exactly']
+    
+    for segment in segments:
+        text = segment.text.lower()
+        
+        # Check for excitement keywords
+        has_keywords = any(keyword in text for keyword in excitement_keywords)
+        
+        # Check for multiple exclamation-worthy patterns
+        has_caps = any(word.isupper() for word in segment.text.split())
+        has_punctuation = '!' in segment.text or '?' in segment.text
+        is_short_excited = len(text.split()) <= 5 and (has_keywords or has_caps)
+        
+        if has_keywords or has_punctuation or is_short_excited:
+            start = max(0, segment.start - chunk_duration/2)
+            end = min(segment.end + chunk_duration/2, segment.end + chunk_duration)
+            emotional_moments.append((start, end))
+    
+    print(f"😄 Found {len(emotional_moments)} emotional speech moments")
+    return emotional_moments
+
+def detect_audio_peaks(video_path, chunk_duration=5):
+    """Detect sudden audio peaks (bass drops, beats, impacts)"""
+    print("🎵 Analyzing audio peaks...")
+    
+    clip = VideoFileClip(video_path)
+    audio = clip.audio.to_soundarray(fps=22050)  # Lower sample rate for speed
+    
+    # Convert to mono if stereo
+    if len(audio.shape) > 1:
+        audio = np.mean(audio, axis=1)
+    
+    # Find spectral peaks (bass, treble spikes)
+    peaks, _ = signal.find_peaks(np.abs(audio), height=np.percentile(np.abs(audio), 95))
+    
+    peak_moments = []
+    prev_peak = 0
+    
+    for peak in peaks:
+        peak_time = peak / 22050
+        
+        # Avoid too close peaks
+        if peak_time - prev_peak > chunk_duration:
+            start = max(0, peak_time - chunk_duration/2)
+            end = min(clip.duration, peak_time + chunk_duration/2)
+            peak_moments.append((start, end))
+            prev_peak = peak_time
+    
+    print(f"🎵 Found {len(peak_moments)} audio peak moments")
+    clip.close()
+    return peak_moments
+
+def detect_combined_intensity(video_path, chunk_duration=5, weights=None):
+    """Combine multiple detection methods for best moments"""
+    print("🎯 Running comprehensive moment analysis...")
+    
+    if weights is None:
+        weights = {'loud': 0.3, 'scene': 0.2, 'motion': 0.2, 'speech': 0.2, 'peaks': 0.1}
+    
+    # Get all detection results
+    loud_moments = detect_loud_moments(video_path, chunk_duration, threshold_db=5)  # Lower threshold
+    scene_moments = detect_scene_changes(video_path, chunk_duration)
+    motion_moments = detect_motion_intensity(video_path, chunk_duration)
+    speech_moments = detect_speech_emotion(video_path, chunk_duration)
+    peak_moments = detect_audio_peaks(video_path, chunk_duration)
+    
+    # Create time-based scoring
+    clip = VideoFileClip(video_path)
+    duration = clip.duration
+    clip.close()
+    
+    # Score each second of the video
+    time_scores = {}
+    
+    for moments, weight in [(loud_moments, weights['loud']), 
+                           (scene_moments, weights['scene']),
+                           (motion_moments, weights['motion']),
+                           (speech_moments, weights['speech']),
+                           (peak_moments, weights['peaks'])]:
+        for start, end in moments:
+            for t in range(int(start), int(end) + 1):
+                if t not in time_scores:
+                    time_scores[t] = 0
+                time_scores[t] += weight
+    
+    # Find the highest scoring segments
+    if not time_scores:
+        return loud_moments  # Fallback to loud moments
+    
+    # Get top scoring time periods
+    sorted_times = sorted(time_scores.items(), key=lambda x: x[1], reverse=True)
+    
+    combined_moments = []
+    used_times = set()
+    
+    for time_sec, score in sorted_times:
+        if time_sec not in used_times and score > 0.3:  # Minimum threshold
+            start = max(0, time_sec - chunk_duration/2)
+            end = min(duration, time_sec + chunk_duration/2)
+            combined_moments.append((start, end))
+            
+            # Mark nearby times as used to avoid overlap
+            for t in range(max(0, time_sec - chunk_duration), 
+                          min(int(duration), time_sec + chunk_duration)):
+                used_times.add(t)
+    
+    print(f"🎯 Found {len(combined_moments)} high-intensity combined moments")
+    return combined_moments
+
 def transcribe_and_extract_subtitles(video_path, start, end):
    print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...")
    model = WhisperModel("base", device="cpu", compute_type="int8")
@ -150,7 +496,8 @@ def validate_video(video_path, min_duration=30):
        else:
            raise ValueError(f"Error reading video: {str(e)}")

-def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_callback=None, threshold_db=-30, clip_duration=5):
+def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_callback=None, 
+                   threshold_db=-30, clip_duration=5, detection_mode="loud"):
    os.makedirs(output_folder, exist_ok=True)
    
    # Validate video first
@ -163,14 +510,42 @@ def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_ca
            progress_callback(f"❌ Video validation failed", 0)
        raise e
    
-    if progress_callback:
-        progress_callback("🔍 Analyzing audio for loud moments...", 10)
+    # Choose detection method based on mode
+    if detection_mode == "loud":
+        if progress_callback:
+            progress_callback("🔍 Analyzing audio for loud moments...", 10)
+        best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)
+    elif detection_mode == "scene":
+        if progress_callback:
+            progress_callback("🎬 Analyzing scene changes...", 10)
+        best_moments = detect_scene_changes(video_path, chunk_duration=clip_duration)
+    elif detection_mode == "motion":
+        if progress_callback:
+            progress_callback("🏃 Analyzing motion intensity...", 10)
+        best_moments = detect_motion_intensity(video_path, chunk_duration=clip_duration)
+    elif detection_mode == "speech":
+        if progress_callback:
+            progress_callback("😄 Analyzing speech emotions...", 10)
+        best_moments = detect_speech_emotion(video_path, chunk_duration=clip_duration)
+    elif detection_mode == "peaks":
+        if progress_callback:
+            progress_callback("🎵 Analyzing audio peaks...", 10)
+        best_moments = detect_audio_peaks(video_path, chunk_duration=clip_duration)
+    elif detection_mode == "combined":
+        if progress_callback:
+            progress_callback("🎯 Running comprehensive analysis...", 10)
+        best_moments = detect_combined_intensity(video_path, chunk_duration=clip_duration)
+    else:
+        best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)
    
-    best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)
    selected = best_moments[:max_clips]
    
    if not selected:
-        raise ValueError(f"No loud moments found with threshold {threshold_db} dB. Try lowering the threshold or use a different video.")
+        mode_name = {
+            "loud": "loud moments", "scene": "scene changes", "motion": "motion intensity",
+            "speech": "emotional speech", "peaks": "audio peaks", "combined": "interesting moments"
+        }.get(detection_mode, "moments")
+        raise ValueError(f"No {mode_name} found. Try a different detection mode or adjust settings.")
    
    if progress_callback:
        progress_callback(f"📊 Found {len(selected)} clips to generate", 20)
@ -194,8 +569,9 @@ def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_ca
 class ShortsGeneratorGUI:
    def __init__(self, root):
        self.root = root
-        self.root.title("AI Shorts Generator")
-        self.root.geometry("500x400")
+        self.root.title("🎬 AI Shorts Generator - Advanced Video Moment Detection")
+        self.root.geometry("650x650")  # Reduced height to eliminate empty space
+        self.root.minsize(600, 600)  # Reduced minimum size
        
        self.video_path = None
        self.output_folder = "shorts"
@ -265,43 +641,202 @@ class ShortsGeneratorGUI:
        self.use_max_clips.trace("w", lambda *args: toggle_clips_limit())
        clips_checkbox.config(command=toggle_clips_limit)
        
-        # Audio threshold
+        # Add tooltip for max clips setting
+        clips_tooltip_text = """Max Clips Control:
+
+• Checked: Limit the number of clips generated
+• Unchecked: Generate all detected moments
+• 1-3 clips: Quick highlights for social media
+• 4-6 clips: Good variety pack
+• 7-10 clips: Comprehensive highlight reel
+
+Tip: Start with 3 clips, then increase if you want more content"""
+        ToolTip(self.clips_spinbox, clips_tooltip_text, side='right')
+        ToolTip(clips_checkbox, clips_tooltip_text, side='right')
+        
+        # Detection Mode Selection
+        detection_frame = tk.Frame(settings_frame)
+        detection_frame.pack(fill="x", pady=5)
+        tk.Label(detection_frame, text="Detection Mode:", font=("Arial", 9, "bold")).pack(side="left")
+        
+        self.detection_mode_var = tk.StringVar(value="loud")
+        self.detection_display_var = tk.StringVar(value="🔊 Loud Moments")
+        
+        detection_dropdown = ttk.Combobox(detection_frame, textvariable=self.detection_display_var, 
+                                          values=["🔊 Loud Moments", "🎬 Scene Changes", "🏃 Motion Intensity", 
+                                                "😄 Emotional Speech", "🎵 Audio Peaks", "🎯 Smart Combined"],
+                                          state="readonly", width=22)
+        detection_dropdown.pack(side="right")
+        
+        # Store the mapping between display text and internal values
+        self.mode_mapping = {
+            "🔊 Loud Moments": "loud",
+            "🎬 Scene Changes": "scene", 
+            "🏃 Motion Intensity": "motion",
+            "😄 Emotional Speech": "speech",
+            "🎵 Audio Peaks": "peaks",
+            "🎯 Smart Combined": "combined"
+        }
+        
+        # Simple, clear descriptions for mode tooltips
+        mode_descriptions = {
+            "🔊 Loud Moments": """Analyzes audio volume levels to find the loudest parts of your video.
+            
+• Best for: Gaming reactions, music highlights, shouting moments
+• Finds: High-volume audio segments above the threshold
+• Ideal when: Your video has clear volume differences
+• Tip: Adjust threshold if too many/few moments found""",
+            
+            "🎬 Scene Changes": """Detects dramatic visual transitions and cuts in your video.
+            
+• Best for: Movie trailers, montages, location changes
+• Finds: Major visual shifts between frames
+• Ideal when: Video has multiple scenes or camera angles
+• Tip: Great for content with quick cuts or transitions""",
+            
+            "🏃 Motion Intensity": """Analyzes movement and action within video frames.
+            
+• Best for: Sports highlights, dance videos, action scenes
+• Finds: High-movement moments with lots of visual activity
+• Ideal when: Video contains physical action or movement
+• Tip: Perfect for extracting the most dynamic moments""",
+            
+            "😄 Emotional Speech": """Uses AI to detect excited, emotional, or emphatic speech patterns.
+            
+• Best for: Reactions, reviews, commentary, tutorials
+• Finds: Words like 'wow', 'amazing', exclamations, excited tone
+• Ideal when: Video has spoken content with emotional moments
+• Tip: Captures the most engaging verbal reactions""",
+            
+            "🎵 Audio Peaks": """Detects sudden audio spikes like bass drops, impacts, or sound effects.
+            
+• Best for: Music videos, sound effect moments, beat drops
+• Finds: Sharp increases in audio frequency or volume
+• Ideal when: Video has musical elements or sound effects
+• Tip: Great for rhythm-based or audio-driven content""",
+            
+            "🎯 Smart Combined": """Intelligently combines all detection methods for optimal results.
+            
+• Best for: Any video type, general content, unsure what to use
+• Finds: Moments scoring high across multiple analysis methods
+• Ideal when: You want the most 'interesting' overall moments
+• Tip: Recommended starting point for most videos"""
+        }
+        
+        # Create tooltip for the dropdown (updates when selection changes)
+        current_tooltip_text = mode_descriptions["🔊 Loud Moments"]  # Default
+        dropdown_tooltip = ToolTip(detection_dropdown, current_tooltip_text)
+        
+        # Update tooltip when selection changes
+        def on_detection_change(event):
+            selection = detection_dropdown.get()
+            mode_map = {
+                "🔊 Loud Moments": "loud",
+                "🎬 Scene Changes": "scene", 
+                "🏃 Motion Intensity": "motion",
+                "😄 Emotional Speech": "speech",
+                "🎵 Audio Peaks": "peaks",
+                "🎯 Smart Combined": "combined"
+            }
+            self.detection_mode_var.set(mode_map.get(selection, "loud"))
+            
+            # Update tooltip text for the selected mode
+            dropdown_tooltip.text = mode_descriptions.get(selection, "Select a detection mode")
+            
+            # Show/hide threshold setting based on mode
+            if selection == "🔊 Loud Moments":
+                threshold_frame.pack(fill="x", pady=5)
+            else:
+                threshold_frame.pack_forget()
+        
+        detection_dropdown.bind("<<ComboboxSelected>>", on_detection_change)
+        
+        # Audio threshold (only shown for loud moments)
        threshold_frame = tk.Frame(settings_frame)
        threshold_frame.pack(fill="x", pady=5)
-        tk.Label(threshold_frame, text="Audio Threshold (dB):").pack(side="left")
+        threshold_label = tk.Label(threshold_frame, text="Audio Threshold (dB):")
+        threshold_label.pack(side="left")
        self.threshold_var = tk.IntVar(value=-30)
        threshold_spinbox = tk.Spinbox(threshold_frame, from_=-50, to=0, width=5, textvariable=self.threshold_var)
        threshold_spinbox.pack(side="right")
        
+        # Add tooltip for threshold setting
+        threshold_tooltip_text = """Audio Threshold Control:
+
+• Higher values (closer to 0): Only very loud moments
+• Lower values (closer to -50): More moments detected
+• Default -30 dB: Good balance for most videos
+• Adjust based on your video's audio levels
+
+Example: Gaming videos might need -20 dB, quiet vlogs might need -40 dB"""
+        ToolTip(threshold_spinbox, threshold_tooltip_text, side='right')
+        
        # Clip duration (increased to 120 seconds max)
        duration_frame = tk.Frame(settings_frame)
        duration_frame.pack(fill="x", pady=5)
-        tk.Label(duration_frame, text="Clip Duration (seconds):").pack(side="left")
+        duration_label = tk.Label(duration_frame, text="Clip Duration (seconds):")
+        duration_label.pack(side="left")
        self.duration_var = tk.IntVar(value=5)
        duration_spinbox = tk.Spinbox(duration_frame, from_=3, to=120, width=5, textvariable=self.duration_var)
        duration_spinbox.pack(side="right")
        
+        # Add tooltip for duration setting
+        duration_tooltip_text = """Clip Duration Setting:
+
+• 3-10 seconds: Perfect for TikTok/Instagram Reels
+• 10-30 seconds: Good for YouTube Shorts
+• 30-60 seconds: Longer form highlights
+• 60+ seconds: Extended content clips
+
+Shorter clips = more viral potential
+Longer clips = more context and story"""
+        ToolTip(duration_spinbox, duration_tooltip_text, side='right')
+        
        # Preview button
        self.preview_btn = tk.Button(self.root, text="🔍 Preview Clips", 
                                    command=self.preview_clips, bg="#2196F3", fg="white", 
                                    font=("Arial", 10, "bold"), pady=5)
-        self.preview_btn.pack(pady=10)
+        self.preview_btn.pack(pady=5)
+        
+        # Add tooltip for preview button
+        preview_tooltip_text = """Preview Clips Feature:
+
+• Analyzes your video using the selected detection mode
+• Shows all detected moments with timestamps
+• Lets you select specific clips to generate
+• No video files created - just analysis
+• Great for testing settings before full generation
+
+Tip: Always preview first to see what the AI finds!"""
+        ToolTip(self.preview_btn, preview_tooltip_text, side='right')
        
        # Generate button
        self.generate_btn = tk.Button(self.root, text="🎬 Generate Shorts", 
                                     command=self.start_generation, bg="#4CAF50", fg="white", 
                                     font=("Arial", 12, "bold"), pady=10)
-        self.generate_btn.pack(pady=20)
+        self.generate_btn.pack(pady=10)
+        
+        # Add tooltip for generate button
+        generate_tooltip_text = """Generate Shorts Feature:
+
+• Creates actual video files from detected moments
+• Adds AI-generated subtitles to each clip
+• Formats videos for vertical social media (1080x1920)
+• Saves clips to your selected output folder
+• Takes longer but creates ready-to-post content
+
+Tip: Use Preview first to fine-tune your settings!"""
+        ToolTip(self.generate_btn, generate_tooltip_text, side='right')
        
        # Progress frame
        progress_frame = tk.Frame(self.root)
-        progress_frame.pack(pady=10, padx=20, fill="x")
+        progress_frame.pack(pady=5, padx=20, fill="x")
        
        self.progress_label = tk.Label(progress_frame, text="Ready to generate shorts")
        self.progress_label.pack()
        
        self.progress_bar = ttk.Progressbar(progress_frame, length=400, mode="determinate")
-        self.progress_bar.pack(pady=5)
+        self.progress_bar.pack(pady=3)
    
    def select_video(self):
        file_path = filedialog.askopenfilename(
@ -327,18 +862,44 @@ class ShortsGeneratorGUI:
            # Validate video first
            validate_video(self.video_path, min_duration=self.duration_var.get() * 2)
            
-            # Analyze for loud moments
+            # Analyze using selected detection mode
            self.preview_btn.config(state="disabled", text="Analyzing...")
            self.root.update()
            
-            loud_moments = detect_loud_moments(
-                self.video_path, 
-                chunk_duration=self.duration_var.get(), 
-                threshold_db=self.threshold_var.get()
-            )
+            detection_mode = self.detection_mode_var.get()
            
-            if not loud_moments:
-                messagebox.showinfo("Preview", f"No loud moments found with threshold {self.threshold_var.get()} dB.\nTry lowering the threshold.")
+            if detection_mode == "loud":
+                moments = detect_loud_moments(
+                    self.video_path, 
+                    chunk_duration=self.duration_var.get(), 
+                    threshold_db=self.threshold_var.get()
+                )
+                mode_name = "loud moments"
+            elif detection_mode == "scene":
+                moments = detect_scene_changes(self.video_path, chunk_duration=self.duration_var.get())
+                mode_name = "scene changes"
+            elif detection_mode == "motion":
+                moments = detect_motion_intensity(self.video_path, chunk_duration=self.duration_var.get())
+                mode_name = "motion moments"
+            elif detection_mode == "speech":
+                moments = detect_speech_emotion(self.video_path, chunk_duration=self.duration_var.get())
+                mode_name = "emotional speech"
+            elif detection_mode == "peaks":
+                moments = detect_audio_peaks(self.video_path, chunk_duration=self.duration_var.get())
+                mode_name = "audio peaks"
+            elif detection_mode == "combined":
+                moments = detect_combined_intensity(self.video_path, chunk_duration=self.duration_var.get())
+                mode_name = "interesting moments"
+            else:
+                moments = detect_loud_moments(
+                    self.video_path, 
+                    chunk_duration=self.duration_var.get(), 
+                    threshold_db=self.threshold_var.get()
+                )
+                mode_name = "loud moments"
+            
+            if not moments:
+                messagebox.showinfo("Preview", f"No {mode_name} found.\nTry a different detection mode or adjust settings.")
                return
            
            # Show preview window
@ -346,7 +907,7 @@ class ShortsGeneratorGUI:
            preview_window.title("Preview and Select Clips")
            preview_window.geometry("500x400")
            
-            tk.Label(preview_window, text=f"Found {len(loud_moments)} loud moments:", font=("Arial", 12, "bold")).pack(pady=10)
+            tk.Label(preview_window, text=f"Found {len(moments)} {mode_name}:", font=("Arial", 12, "bold")).pack(pady=10)
            
            # Create scrollable frame for checkboxes
            canvas = tk.Canvas(preview_window)
@ -364,7 +925,7 @@ class ShortsGeneratorGUI:
            # Store checkbox variables and clip data
            self.clip_vars = []
            # Use all clips if max clips is disabled, otherwise limit by setting
-            clips_to_show = loud_moments if not self.use_max_clips.get() else loud_moments[:self.clips_var.get()]
+            clips_to_show = moments if not self.use_max_clips.get() else moments[:self.clips_var.get()]
            self.preview_clips_data = clips_to_show
            
            # Add selectable clips with checkboxes
@ -510,11 +1071,12 @@ class ShortsGeneratorGUI:
            
            generate_shorts(
                self.video_path, 
-                max_clips=self.clips_var.get() if self.use_max_clips.get() else len(detect_loud_moments(self.video_path, chunk_duration=self.duration_var.get(), threshold_db=self.threshold_var.get())),
+                max_clips=self.clips_var.get() if self.use_max_clips.get() else 10,  # Default max for non-loud modes
                output_folder=self.output_folder,
                progress_callback=self.update_progress,
                threshold_db=self.threshold_var.get(),
-                clip_duration=self.duration_var.get()
+                clip_duration=self.duration_var.get(),
+                detection_mode=self.detection_mode_var.get()
            )
            messagebox.showinfo("Success", f"Successfully generated shorts in '{self.output_folder}' folder!")
        except FileNotFoundError as e:
@ -547,18 +1109,14 @@ def run_gui():

 if __name__ == "__main__":
    import sys
-    if len(sys.argv) > 1 and sys.argv[1] == "--gui":
-        # Run GUI mode
-        run_gui()
-    elif len(sys.argv) < 2:
-        print("Usage: python shorts_generator2.py your_video.mp4")
-        print("   or: python shorts_generator2.py --gui")
-        run_gui()  # Default to GUI if no args
-    else:
+    if len(sys.argv) > 1 and sys.argv[1] != "--gui":
        # Run command line mode
        try:
            generate_shorts(sys.argv[1])
            print("✅ Shorts generation completed successfully!")
        except Exception as e:
            print(f"❌ Error: {str(e)}")
+    else:
+        # Run GUI mode (default)
+        run_gui()

--- a/sub2.srt
+++ b/sub2.srt
@ -1,4 +1,8 @@
 1
-00:00:00,000 --> 00:00:08,250
-Yeah, yeah, level she's 24.
+00:00:00,000 --> 00:00:01,280
+Yeah! Yeah!
+
+2
+00:00:06,000 --> 00:00:07,809
+FLAVOR CHEESE 24!