From bd55be0448c86bc0fc9c4a700b95dc09598f3840 Mon Sep 17 00:00:00 2001 From: klop51 Date: Sat, 9 Aug 2025 15:45:24 +0200 Subject: [PATCH] Update requirements and enhance shorts generator with advanced detection modes and tooltips --- requirements.txt | 3 + shorts_generator2.py | 626 ++++++++++++++++++++++++++++++++++++++++--- sub2.srt | 8 +- 3 files changed, 601 insertions(+), 36 deletions(-) diff --git a/requirements.txt b/requirements.txt index ea776d5..bad8a65 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,6 @@ moviepy>=1.0.3 faster-whisper>=0.10.0 numpy>=1.21.0 +opencv-python>=4.5.0 +scipy>=1.7.0 +librosa>=0.9.0 diff --git a/shorts_generator2.py b/shorts_generator2.py index 49d0452..e4dfa82 100644 --- a/shorts_generator2.py +++ b/shorts_generator2.py @@ -5,6 +5,150 @@ from faster_whisper import WhisperModel import tkinter as tk from tkinter import filedialog, messagebox, ttk import threading +import cv2 +from scipy import signal +import librosa + +class ToolTip: + """Create a tooltip for a given widget""" + def __init__(self, widget, text='widget info', side='right'): + self.widget = widget + self.text = text + self.side = side + self.widget.bind("", self.enter) + self.widget.bind("", self.leave) + self.tipwindow = None + + def enter(self, event=None): + self.showtip() + + def leave(self, event=None): + self.hidetip() + + def showtip(self): + if self.tipwindow or not self.text: + return + + # Get widget position + x = self.widget.winfo_rootx() + y = self.widget.winfo_rooty() + w = self.widget.winfo_width() + h = self.widget.winfo_height() + + # Position tooltip based on side preference + if self.side == 'right': + x = x + w + 10 # 10px to the right of widget + y = y + else: + x = x + 25 + y = y + h + 5 + + self.tipwindow = tw = tk.Toplevel(self.widget) + tw.wm_overrideredirect(True) + tw.wm_geometry("+%d+%d" % (x, y)) + label = tk.Label(tw, text=self.text, justify=tk.LEFT, + background="#ffffe0", relief=tk.SOLID, borderwidth=1, + font=("Arial", "9", "normal"), wraplength=350) + label.pack(ipadx=5, ipady=3) + + def hidetip(self): + tw = self.tipwindow + self.tipwindow = None + if tw: + tw.destroy() + +class ComboboxTooltip: + """Special tooltip for combobox that shows on listbox hover""" + def __init__(self, combobox, descriptions): + self.combobox = combobox + self.descriptions = descriptions + self.tipwindow = None + self.bound_listbox = None + + # Bind to combobox events + self.combobox.bind("", self.on_click) + self.combobox.bind("", self.on_keypress) + + def on_click(self, event): + # Try to find the listbox when dropdown opens + self.combobox.after(50, self.bind_listbox) + + def on_keypress(self, event): + # Handle keyboard navigation + self.combobox.after(50, self.bind_listbox) + + def bind_listbox(self): + # Find the listbox widget more reliably + try: + # Look through all toplevel windows for the combobox popdown + for window in self.combobox.winfo_toplevel().winfo_children(): + window_class = window.winfo_class() + if window_class == 'Toplevel': + # Found a toplevel, look for listbox inside + for child in window.winfo_children(): + if child.winfo_class() == 'Listbox': + if self.bound_listbox != child: + self.bound_listbox = child + child.bind("", self.on_listbox_motion) + child.bind("", self.on_listbox_leave) + child.bind("", self.on_listbox_leave) + return + except Exception as e: + # Fallback method - try to find any listbox + try: + # Alternative approach: look for the popdown frame + for child in self.combobox.tk.call('winfo', 'children', '.'): + if 'popdown' in str(child): + popdown = self.combobox.nametowidget(child) + for subchild in popdown.winfo_children(): + if subchild.winfo_class() == 'Listbox': + if self.bound_listbox != subchild: + self.bound_listbox = subchild + subchild.bind("", self.on_listbox_motion) + subchild.bind("", self.on_listbox_leave) + subchild.bind("", self.on_listbox_leave) + return + except: + pass + + def on_listbox_motion(self, event): + try: + listbox = event.widget + index = listbox.nearest(event.y) + if 0 <= index < len(self.combobox['values']): + selection = self.combobox['values'][index] + if selection in self.descriptions: + self.show_tooltip(event, self.descriptions[selection]) + except Exception: + pass + + def on_listbox_leave(self, event): + self.hide_tooltip() + + def show_tooltip(self, event, text): + self.hide_tooltip() # Hide any existing tooltip + + try: + x = event.widget.winfo_rootx() + event.widget.winfo_width() + 10 + y = event.widget.winfo_rooty() + event.y - 20 + + self.tipwindow = tw = tk.Toplevel(event.widget) + tw.wm_overrideredirect(True) + tw.wm_geometry("+%d+%d" % (x, y)) + label = tk.Label(tw, text=text, justify=tk.LEFT, + background="#ffffe0", relief=tk.SOLID, borderwidth=1, + font=("Arial", "9", "normal"), wraplength=350) + label.pack(ipadx=5, ipady=3) + except Exception: + pass + + def hide_tooltip(self): + if self.tipwindow: + try: + self.tipwindow.destroy() + except: + pass + self.tipwindow = None def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10): print("🔍 Analyzing audio...") @@ -28,6 +172,208 @@ def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10): clip.close() return loud_chunks +def detect_scene_changes(video_path, chunk_duration=5, threshold=0.3): + """Detect dramatic visual scene changes""" + print("🎬 Analyzing scene changes...") + clip = VideoFileClip(video_path) + + # Sample frames at regular intervals + sample_rate = 2 # Check every 2 seconds + times = np.arange(0, clip.duration, sample_rate) + + scene_changes = [] + prev_frame = None + + for i, t in enumerate(times[:-1]): + try: + # Get current and next frame + frame1 = clip.get_frame(t) + frame2 = clip.get_frame(times[i + 1]) + + # Convert to grayscale and resize for faster processing + gray1 = cv2.cvtColor(frame1, cv2.COLOR_RGB2GRAY) + gray2 = cv2.cvtColor(frame2, cv2.COLOR_RGB2GRAY) + gray1 = cv2.resize(gray1, (160, 90)) # Small size for speed + gray2 = cv2.resize(gray2, (160, 90)) + + # Calculate structural similarity difference + diff = np.mean(np.abs(gray1.astype(float) - gray2.astype(float))) / 255.0 + + if diff > threshold: + start = max(0, t - chunk_duration/2) + end = min(clip.duration, t + chunk_duration/2) + scene_changes.append((start, end)) + + except Exception as e: + print(f"⚠️ Frame analysis error at {t:.1f}s: {e}") + continue + + print(f"🎬 Found {len(scene_changes)} scene changes") + clip.close() + return scene_changes + +def detect_motion_intensity(video_path, chunk_duration=5, threshold=0.15): + """Detect high motion/action moments""" + print("🏃 Analyzing motion intensity...") + clip = VideoFileClip(video_path) + + sample_rate = 1 # Check every second + times = np.arange(0, clip.duration - 1, sample_rate) + + motion_moments = [] + + for t in times: + try: + # Get two consecutive frames + frame1 = clip.get_frame(t) + frame2 = clip.get_frame(t + 0.5) # Half second later + + # Convert to grayscale and resize + gray1 = cv2.cvtColor(frame1, cv2.COLOR_RGB2GRAY) + gray2 = cv2.cvtColor(frame2, cv2.COLOR_RGB2GRAY) + gray1 = cv2.resize(gray1, (160, 90)) + gray2 = cv2.resize(gray2, (160, 90)) + + # Calculate optical flow magnitude + flow = cv2.calcOpticalFlowPyrLK(gray1, gray2, + np.random.randint(0, 160, (100, 1, 2)).astype(np.float32), + None)[0] + + if flow is not None: + motion_magnitude = np.mean(np.linalg.norm(flow.reshape(-1, 2), axis=1)) + + if motion_magnitude > threshold: + start = max(0, t - chunk_duration/2) + end = min(clip.duration, t + chunk_duration/2) + motion_moments.append((start, end)) + + except Exception as e: + print(f"⚠️ Motion analysis error at {t:.1f}s: {e}") + continue + + print(f"🏃 Found {len(motion_moments)} high-motion moments") + clip.close() + return motion_moments + +def detect_speech_emotion(video_path, chunk_duration=5): + """Detect emotional/excited speech patterns""" + print("😄 Analyzing speech emotions...") + + # Use Whisper to get detailed speech analysis + model = WhisperModel("base", device="cpu", compute_type="int8") + segments, _ = model.transcribe(video_path, beam_size=5, vad_filter=True, word_timestamps=True) + + emotional_moments = [] + excitement_keywords = ['wow', 'amazing', 'incredible', 'unbelievable', 'awesome', 'fantastic', + 'omg', 'what', 'no way', 'crazy', 'insane', 'perfect', 'yes', 'exactly'] + + for segment in segments: + text = segment.text.lower() + + # Check for excitement keywords + has_keywords = any(keyword in text for keyword in excitement_keywords) + + # Check for multiple exclamation-worthy patterns + has_caps = any(word.isupper() for word in segment.text.split()) + has_punctuation = '!' in segment.text or '?' in segment.text + is_short_excited = len(text.split()) <= 5 and (has_keywords or has_caps) + + if has_keywords or has_punctuation or is_short_excited: + start = max(0, segment.start - chunk_duration/2) + end = min(segment.end + chunk_duration/2, segment.end + chunk_duration) + emotional_moments.append((start, end)) + + print(f"😄 Found {len(emotional_moments)} emotional speech moments") + return emotional_moments + +def detect_audio_peaks(video_path, chunk_duration=5): + """Detect sudden audio peaks (bass drops, beats, impacts)""" + print("🎵 Analyzing audio peaks...") + + clip = VideoFileClip(video_path) + audio = clip.audio.to_soundarray(fps=22050) # Lower sample rate for speed + + # Convert to mono if stereo + if len(audio.shape) > 1: + audio = np.mean(audio, axis=1) + + # Find spectral peaks (bass, treble spikes) + peaks, _ = signal.find_peaks(np.abs(audio), height=np.percentile(np.abs(audio), 95)) + + peak_moments = [] + prev_peak = 0 + + for peak in peaks: + peak_time = peak / 22050 + + # Avoid too close peaks + if peak_time - prev_peak > chunk_duration: + start = max(0, peak_time - chunk_duration/2) + end = min(clip.duration, peak_time + chunk_duration/2) + peak_moments.append((start, end)) + prev_peak = peak_time + + print(f"🎵 Found {len(peak_moments)} audio peak moments") + clip.close() + return peak_moments + +def detect_combined_intensity(video_path, chunk_duration=5, weights=None): + """Combine multiple detection methods for best moments""" + print("🎯 Running comprehensive moment analysis...") + + if weights is None: + weights = {'loud': 0.3, 'scene': 0.2, 'motion': 0.2, 'speech': 0.2, 'peaks': 0.1} + + # Get all detection results + loud_moments = detect_loud_moments(video_path, chunk_duration, threshold_db=5) # Lower threshold + scene_moments = detect_scene_changes(video_path, chunk_duration) + motion_moments = detect_motion_intensity(video_path, chunk_duration) + speech_moments = detect_speech_emotion(video_path, chunk_duration) + peak_moments = detect_audio_peaks(video_path, chunk_duration) + + # Create time-based scoring + clip = VideoFileClip(video_path) + duration = clip.duration + clip.close() + + # Score each second of the video + time_scores = {} + + for moments, weight in [(loud_moments, weights['loud']), + (scene_moments, weights['scene']), + (motion_moments, weights['motion']), + (speech_moments, weights['speech']), + (peak_moments, weights['peaks'])]: + for start, end in moments: + for t in range(int(start), int(end) + 1): + if t not in time_scores: + time_scores[t] = 0 + time_scores[t] += weight + + # Find the highest scoring segments + if not time_scores: + return loud_moments # Fallback to loud moments + + # Get top scoring time periods + sorted_times = sorted(time_scores.items(), key=lambda x: x[1], reverse=True) + + combined_moments = [] + used_times = set() + + for time_sec, score in sorted_times: + if time_sec not in used_times and score > 0.3: # Minimum threshold + start = max(0, time_sec - chunk_duration/2) + end = min(duration, time_sec + chunk_duration/2) + combined_moments.append((start, end)) + + # Mark nearby times as used to avoid overlap + for t in range(max(0, time_sec - chunk_duration), + min(int(duration), time_sec + chunk_duration)): + used_times.add(t) + + print(f"🎯 Found {len(combined_moments)} high-intensity combined moments") + return combined_moments + def transcribe_and_extract_subtitles(video_path, start, end): print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...") model = WhisperModel("base", device="cpu", compute_type="int8") @@ -150,7 +496,8 @@ def validate_video(video_path, min_duration=30): else: raise ValueError(f"Error reading video: {str(e)}") -def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_callback=None, threshold_db=-30, clip_duration=5): +def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_callback=None, + threshold_db=-30, clip_duration=5, detection_mode="loud"): os.makedirs(output_folder, exist_ok=True) # Validate video first @@ -163,14 +510,42 @@ def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_ca progress_callback(f"❌ Video validation failed", 0) raise e - if progress_callback: - progress_callback("🔍 Analyzing audio for loud moments...", 10) + # Choose detection method based on mode + if detection_mode == "loud": + if progress_callback: + progress_callback("🔍 Analyzing audio for loud moments...", 10) + best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db) + elif detection_mode == "scene": + if progress_callback: + progress_callback("🎬 Analyzing scene changes...", 10) + best_moments = detect_scene_changes(video_path, chunk_duration=clip_duration) + elif detection_mode == "motion": + if progress_callback: + progress_callback("🏃 Analyzing motion intensity...", 10) + best_moments = detect_motion_intensity(video_path, chunk_duration=clip_duration) + elif detection_mode == "speech": + if progress_callback: + progress_callback("😄 Analyzing speech emotions...", 10) + best_moments = detect_speech_emotion(video_path, chunk_duration=clip_duration) + elif detection_mode == "peaks": + if progress_callback: + progress_callback("🎵 Analyzing audio peaks...", 10) + best_moments = detect_audio_peaks(video_path, chunk_duration=clip_duration) + elif detection_mode == "combined": + if progress_callback: + progress_callback("🎯 Running comprehensive analysis...", 10) + best_moments = detect_combined_intensity(video_path, chunk_duration=clip_duration) + else: + best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db) - best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db) selected = best_moments[:max_clips] if not selected: - raise ValueError(f"No loud moments found with threshold {threshold_db} dB. Try lowering the threshold or use a different video.") + mode_name = { + "loud": "loud moments", "scene": "scene changes", "motion": "motion intensity", + "speech": "emotional speech", "peaks": "audio peaks", "combined": "interesting moments" + }.get(detection_mode, "moments") + raise ValueError(f"No {mode_name} found. Try a different detection mode or adjust settings.") if progress_callback: progress_callback(f"📊 Found {len(selected)} clips to generate", 20) @@ -194,8 +569,9 @@ def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_ca class ShortsGeneratorGUI: def __init__(self, root): self.root = root - self.root.title("AI Shorts Generator") - self.root.geometry("500x400") + self.root.title("🎬 AI Shorts Generator - Advanced Video Moment Detection") + self.root.geometry("650x650") # Reduced height to eliminate empty space + self.root.minsize(600, 600) # Reduced minimum size self.video_path = None self.output_folder = "shorts" @@ -265,43 +641,202 @@ class ShortsGeneratorGUI: self.use_max_clips.trace("w", lambda *args: toggle_clips_limit()) clips_checkbox.config(command=toggle_clips_limit) - # Audio threshold + # Add tooltip for max clips setting + clips_tooltip_text = """Max Clips Control: + +• Checked: Limit the number of clips generated +• Unchecked: Generate all detected moments +• 1-3 clips: Quick highlights for social media +• 4-6 clips: Good variety pack +• 7-10 clips: Comprehensive highlight reel + +Tip: Start with 3 clips, then increase if you want more content""" + ToolTip(self.clips_spinbox, clips_tooltip_text, side='right') + ToolTip(clips_checkbox, clips_tooltip_text, side='right') + + # Detection Mode Selection + detection_frame = tk.Frame(settings_frame) + detection_frame.pack(fill="x", pady=5) + tk.Label(detection_frame, text="Detection Mode:", font=("Arial", 9, "bold")).pack(side="left") + + self.detection_mode_var = tk.StringVar(value="loud") + self.detection_display_var = tk.StringVar(value="🔊 Loud Moments") + + detection_dropdown = ttk.Combobox(detection_frame, textvariable=self.detection_display_var, + values=["🔊 Loud Moments", "🎬 Scene Changes", "🏃 Motion Intensity", + "😄 Emotional Speech", "🎵 Audio Peaks", "🎯 Smart Combined"], + state="readonly", width=22) + detection_dropdown.pack(side="right") + + # Store the mapping between display text and internal values + self.mode_mapping = { + "🔊 Loud Moments": "loud", + "🎬 Scene Changes": "scene", + "🏃 Motion Intensity": "motion", + "😄 Emotional Speech": "speech", + "🎵 Audio Peaks": "peaks", + "🎯 Smart Combined": "combined" + } + + # Simple, clear descriptions for mode tooltips + mode_descriptions = { + "🔊 Loud Moments": """Analyzes audio volume levels to find the loudest parts of your video. + +• Best for: Gaming reactions, music highlights, shouting moments +• Finds: High-volume audio segments above the threshold +• Ideal when: Your video has clear volume differences +• Tip: Adjust threshold if too many/few moments found""", + + "🎬 Scene Changes": """Detects dramatic visual transitions and cuts in your video. + +• Best for: Movie trailers, montages, location changes +• Finds: Major visual shifts between frames +• Ideal when: Video has multiple scenes or camera angles +• Tip: Great for content with quick cuts or transitions""", + + "🏃 Motion Intensity": """Analyzes movement and action within video frames. + +• Best for: Sports highlights, dance videos, action scenes +• Finds: High-movement moments with lots of visual activity +• Ideal when: Video contains physical action or movement +• Tip: Perfect for extracting the most dynamic moments""", + + "😄 Emotional Speech": """Uses AI to detect excited, emotional, or emphatic speech patterns. + +• Best for: Reactions, reviews, commentary, tutorials +• Finds: Words like 'wow', 'amazing', exclamations, excited tone +• Ideal when: Video has spoken content with emotional moments +• Tip: Captures the most engaging verbal reactions""", + + "🎵 Audio Peaks": """Detects sudden audio spikes like bass drops, impacts, or sound effects. + +• Best for: Music videos, sound effect moments, beat drops +• Finds: Sharp increases in audio frequency or volume +• Ideal when: Video has musical elements or sound effects +• Tip: Great for rhythm-based or audio-driven content""", + + "🎯 Smart Combined": """Intelligently combines all detection methods for optimal results. + +• Best for: Any video type, general content, unsure what to use +• Finds: Moments scoring high across multiple analysis methods +• Ideal when: You want the most 'interesting' overall moments +• Tip: Recommended starting point for most videos""" + } + + # Create tooltip for the dropdown (updates when selection changes) + current_tooltip_text = mode_descriptions["🔊 Loud Moments"] # Default + dropdown_tooltip = ToolTip(detection_dropdown, current_tooltip_text) + + # Update tooltip when selection changes + def on_detection_change(event): + selection = detection_dropdown.get() + mode_map = { + "🔊 Loud Moments": "loud", + "🎬 Scene Changes": "scene", + "🏃 Motion Intensity": "motion", + "😄 Emotional Speech": "speech", + "🎵 Audio Peaks": "peaks", + "🎯 Smart Combined": "combined" + } + self.detection_mode_var.set(mode_map.get(selection, "loud")) + + # Update tooltip text for the selected mode + dropdown_tooltip.text = mode_descriptions.get(selection, "Select a detection mode") + + # Show/hide threshold setting based on mode + if selection == "🔊 Loud Moments": + threshold_frame.pack(fill="x", pady=5) + else: + threshold_frame.pack_forget() + + detection_dropdown.bind("<>", on_detection_change) + + # Audio threshold (only shown for loud moments) threshold_frame = tk.Frame(settings_frame) threshold_frame.pack(fill="x", pady=5) - tk.Label(threshold_frame, text="Audio Threshold (dB):").pack(side="left") + threshold_label = tk.Label(threshold_frame, text="Audio Threshold (dB):") + threshold_label.pack(side="left") self.threshold_var = tk.IntVar(value=-30) threshold_spinbox = tk.Spinbox(threshold_frame, from_=-50, to=0, width=5, textvariable=self.threshold_var) threshold_spinbox.pack(side="right") + # Add tooltip for threshold setting + threshold_tooltip_text = """Audio Threshold Control: + +• Higher values (closer to 0): Only very loud moments +• Lower values (closer to -50): More moments detected +• Default -30 dB: Good balance for most videos +• Adjust based on your video's audio levels + +Example: Gaming videos might need -20 dB, quiet vlogs might need -40 dB""" + ToolTip(threshold_spinbox, threshold_tooltip_text, side='right') + # Clip duration (increased to 120 seconds max) duration_frame = tk.Frame(settings_frame) duration_frame.pack(fill="x", pady=5) - tk.Label(duration_frame, text="Clip Duration (seconds):").pack(side="left") + duration_label = tk.Label(duration_frame, text="Clip Duration (seconds):") + duration_label.pack(side="left") self.duration_var = tk.IntVar(value=5) duration_spinbox = tk.Spinbox(duration_frame, from_=3, to=120, width=5, textvariable=self.duration_var) duration_spinbox.pack(side="right") + # Add tooltip for duration setting + duration_tooltip_text = """Clip Duration Setting: + +• 3-10 seconds: Perfect for TikTok/Instagram Reels +• 10-30 seconds: Good for YouTube Shorts +• 30-60 seconds: Longer form highlights +• 60+ seconds: Extended content clips + +Shorter clips = more viral potential +Longer clips = more context and story""" + ToolTip(duration_spinbox, duration_tooltip_text, side='right') + # Preview button self.preview_btn = tk.Button(self.root, text="🔍 Preview Clips", command=self.preview_clips, bg="#2196F3", fg="white", font=("Arial", 10, "bold"), pady=5) - self.preview_btn.pack(pady=10) + self.preview_btn.pack(pady=5) + + # Add tooltip for preview button + preview_tooltip_text = """Preview Clips Feature: + +• Analyzes your video using the selected detection mode +• Shows all detected moments with timestamps +• Lets you select specific clips to generate +• No video files created - just analysis +• Great for testing settings before full generation + +Tip: Always preview first to see what the AI finds!""" + ToolTip(self.preview_btn, preview_tooltip_text, side='right') # Generate button self.generate_btn = tk.Button(self.root, text="🎬 Generate Shorts", command=self.start_generation, bg="#4CAF50", fg="white", font=("Arial", 12, "bold"), pady=10) - self.generate_btn.pack(pady=20) + self.generate_btn.pack(pady=10) + + # Add tooltip for generate button + generate_tooltip_text = """Generate Shorts Feature: + +• Creates actual video files from detected moments +• Adds AI-generated subtitles to each clip +• Formats videos for vertical social media (1080x1920) +• Saves clips to your selected output folder +• Takes longer but creates ready-to-post content + +Tip: Use Preview first to fine-tune your settings!""" + ToolTip(self.generate_btn, generate_tooltip_text, side='right') # Progress frame progress_frame = tk.Frame(self.root) - progress_frame.pack(pady=10, padx=20, fill="x") + progress_frame.pack(pady=5, padx=20, fill="x") self.progress_label = tk.Label(progress_frame, text="Ready to generate shorts") self.progress_label.pack() self.progress_bar = ttk.Progressbar(progress_frame, length=400, mode="determinate") - self.progress_bar.pack(pady=5) + self.progress_bar.pack(pady=3) def select_video(self): file_path = filedialog.askopenfilename( @@ -327,18 +862,44 @@ class ShortsGeneratorGUI: # Validate video first validate_video(self.video_path, min_duration=self.duration_var.get() * 2) - # Analyze for loud moments + # Analyze using selected detection mode self.preview_btn.config(state="disabled", text="Analyzing...") self.root.update() - loud_moments = detect_loud_moments( - self.video_path, - chunk_duration=self.duration_var.get(), - threshold_db=self.threshold_var.get() - ) + detection_mode = self.detection_mode_var.get() - if not loud_moments: - messagebox.showinfo("Preview", f"No loud moments found with threshold {self.threshold_var.get()} dB.\nTry lowering the threshold.") + if detection_mode == "loud": + moments = detect_loud_moments( + self.video_path, + chunk_duration=self.duration_var.get(), + threshold_db=self.threshold_var.get() + ) + mode_name = "loud moments" + elif detection_mode == "scene": + moments = detect_scene_changes(self.video_path, chunk_duration=self.duration_var.get()) + mode_name = "scene changes" + elif detection_mode == "motion": + moments = detect_motion_intensity(self.video_path, chunk_duration=self.duration_var.get()) + mode_name = "motion moments" + elif detection_mode == "speech": + moments = detect_speech_emotion(self.video_path, chunk_duration=self.duration_var.get()) + mode_name = "emotional speech" + elif detection_mode == "peaks": + moments = detect_audio_peaks(self.video_path, chunk_duration=self.duration_var.get()) + mode_name = "audio peaks" + elif detection_mode == "combined": + moments = detect_combined_intensity(self.video_path, chunk_duration=self.duration_var.get()) + mode_name = "interesting moments" + else: + moments = detect_loud_moments( + self.video_path, + chunk_duration=self.duration_var.get(), + threshold_db=self.threshold_var.get() + ) + mode_name = "loud moments" + + if not moments: + messagebox.showinfo("Preview", f"No {mode_name} found.\nTry a different detection mode or adjust settings.") return # Show preview window @@ -346,7 +907,7 @@ class ShortsGeneratorGUI: preview_window.title("Preview and Select Clips") preview_window.geometry("500x400") - tk.Label(preview_window, text=f"Found {len(loud_moments)} loud moments:", font=("Arial", 12, "bold")).pack(pady=10) + tk.Label(preview_window, text=f"Found {len(moments)} {mode_name}:", font=("Arial", 12, "bold")).pack(pady=10) # Create scrollable frame for checkboxes canvas = tk.Canvas(preview_window) @@ -364,7 +925,7 @@ class ShortsGeneratorGUI: # Store checkbox variables and clip data self.clip_vars = [] # Use all clips if max clips is disabled, otherwise limit by setting - clips_to_show = loud_moments if not self.use_max_clips.get() else loud_moments[:self.clips_var.get()] + clips_to_show = moments if not self.use_max_clips.get() else moments[:self.clips_var.get()] self.preview_clips_data = clips_to_show # Add selectable clips with checkboxes @@ -510,11 +1071,12 @@ class ShortsGeneratorGUI: generate_shorts( self.video_path, - max_clips=self.clips_var.get() if self.use_max_clips.get() else len(detect_loud_moments(self.video_path, chunk_duration=self.duration_var.get(), threshold_db=self.threshold_var.get())), + max_clips=self.clips_var.get() if self.use_max_clips.get() else 10, # Default max for non-loud modes output_folder=self.output_folder, progress_callback=self.update_progress, threshold_db=self.threshold_var.get(), - clip_duration=self.duration_var.get() + clip_duration=self.duration_var.get(), + detection_mode=self.detection_mode_var.get() ) messagebox.showinfo("Success", f"Successfully generated shorts in '{self.output_folder}' folder!") except FileNotFoundError as e: @@ -547,18 +1109,14 @@ def run_gui(): if __name__ == "__main__": import sys - if len(sys.argv) > 1 and sys.argv[1] == "--gui": - # Run GUI mode - run_gui() - elif len(sys.argv) < 2: - print("Usage: python shorts_generator2.py your_video.mp4") - print(" or: python shorts_generator2.py --gui") - run_gui() # Default to GUI if no args - else: + if len(sys.argv) > 1 and sys.argv[1] != "--gui": # Run command line mode try: generate_shorts(sys.argv[1]) print("✅ Shorts generation completed successfully!") except Exception as e: print(f"❌ Error: {str(e)}") + else: + # Run GUI mode (default) + run_gui() diff --git a/sub2.srt b/sub2.srt index ec5ac54..bf75ea1 100644 --- a/sub2.srt +++ b/sub2.srt @@ -1,4 +1,8 @@ 1 -00:00:00,000 --> 00:00:08,250 -Yeah, yeah, level she's 24. +00:00:00,000 --> 00:00:01,280 +Yeah! Yeah! + +2 +00:00:06,000 --> 00:00:07,809 +FLAVOR CHEESE 24!