import os import numpy as np from moviepy import VideoFileClip, TextClip, CompositeVideoClip from faster_whisper import WhisperModel import tkinter as tk from tkinter import filedialog, messagebox, ttk import threading import cv2 from scipy import signal import librosa class ToolTip: """Create a tooltip for a given widget""" def __init__(self, widget, text='widget info', side='right'): self.widget = widget self.text = text self.side = side self.widget.bind("", self.enter) self.widget.bind("", self.leave) self.tipwindow = None def enter(self, event=None): self.showtip() def leave(self, event=None): self.hidetip() def showtip(self): if self.tipwindow or not self.text: return # Get widget position x = self.widget.winfo_rootx() y = self.widget.winfo_rooty() w = self.widget.winfo_width() h = self.widget.winfo_height() # Position tooltip based on side preference if self.side == 'right': x = x + w + 10 # 10px to the right of widget y = y else: x = x + 25 y = y + h + 5 self.tipwindow = tw = tk.Toplevel(self.widget) tw.wm_overrideredirect(True) tw.wm_geometry("+%d+%d" % (x, y)) label = tk.Label(tw, text=self.text, justify=tk.LEFT, background="#ffffe0", relief=tk.SOLID, borderwidth=1, font=("Arial", "9", "normal"), wraplength=350) label.pack(ipadx=5, ipady=3) def hidetip(self): tw = self.tipwindow self.tipwindow = None if tw: tw.destroy() class ComboboxTooltip: """Special tooltip for combobox that shows on listbox hover""" def __init__(self, combobox, descriptions): self.combobox = combobox self.descriptions = descriptions self.tipwindow = None self.bound_listbox = None # Bind to combobox events self.combobox.bind("", self.on_click) self.combobox.bind("", self.on_keypress) def on_click(self, event): # Try to find the listbox when dropdown opens self.combobox.after(50, self.bind_listbox) def on_keypress(self, event): # Handle keyboard navigation self.combobox.after(50, self.bind_listbox) def bind_listbox(self): # Find the listbox widget more reliably try: # Look through all toplevel windows for the combobox popdown for window in self.combobox.winfo_toplevel().winfo_children(): window_class = window.winfo_class() if window_class == 'Toplevel': # Found a toplevel, look for listbox inside for child in window.winfo_children(): if child.winfo_class() == 'Listbox': if self.bound_listbox != child: self.bound_listbox = child child.bind("", self.on_listbox_motion) child.bind("", self.on_listbox_leave) child.bind("", self.on_listbox_leave) return except Exception as e: # Fallback method - try to find any listbox try: # Alternative approach: look for the popdown frame for child in self.combobox.tk.call('winfo', 'children', '.'): if 'popdown' in str(child): popdown = self.combobox.nametowidget(child) for subchild in popdown.winfo_children(): if subchild.winfo_class() == 'Listbox': if self.bound_listbox != subchild: self.bound_listbox = subchild subchild.bind("", self.on_listbox_motion) subchild.bind("", self.on_listbox_leave) subchild.bind("", self.on_listbox_leave) return except: pass def on_listbox_motion(self, event): try: listbox = event.widget index = listbox.nearest(event.y) if 0 <= index < len(self.combobox['values']): selection = self.combobox['values'][index] if selection in self.descriptions: self.show_tooltip(event, self.descriptions[selection]) except Exception: pass def on_listbox_leave(self, event): self.hide_tooltip() def show_tooltip(self, event, text): self.hide_tooltip() # Hide any existing tooltip try: x = event.widget.winfo_rootx() + event.widget.winfo_width() + 10 y = event.widget.winfo_rooty() + event.y - 20 self.tipwindow = tw = tk.Toplevel(event.widget) tw.wm_overrideredirect(True) tw.wm_geometry("+%d+%d" % (x, y)) label = tk.Label(tw, text=text, justify=tk.LEFT, background="#ffffe0", relief=tk.SOLID, borderwidth=1, font=("Arial", "9", "normal"), wraplength=350) label.pack(ipadx=5, ipady=3) except Exception: pass def hide_tooltip(self): if self.tipwindow: try: self.tipwindow.destroy() except: pass self.tipwindow = None def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10): print("🔍 Analyzing audio...") clip = VideoFileClip(video_path) audio = clip.audio.to_soundarray(fps=44100) volume = np.linalg.norm(audio, axis=1) chunk_size = int(chunk_duration * 44100) loud_chunks = [] max_db = -float('inf') for i in range(0, len(volume), chunk_size): chunk = volume[i:i+chunk_size] db = 20 * np.log10(np.mean(chunk) + 1e-10) max_db = max(max_db, db) if db > threshold_db: start = i / 44100 loud_chunks.append((start, min(start + chunk_duration, clip.duration))) print(f"🔊 Max volume found: {max_db:.2f} dB, threshold: {threshold_db} dB") print(f"📈 Found {len(loud_chunks)} loud moments") clip.close() return loud_chunks def detect_scene_changes(video_path, chunk_duration=5, threshold=0.3): """Detect dramatic visual scene changes""" print("🎬 Analyzing scene changes...") clip = VideoFileClip(video_path) # Sample frames at regular intervals sample_rate = 2 # Check every 2 seconds times = np.arange(0, clip.duration, sample_rate) scene_changes = [] prev_frame = None for i, t in enumerate(times[:-1]): try: # Get current and next frame frame1 = clip.get_frame(t) frame2 = clip.get_frame(times[i + 1]) # Convert to grayscale and resize for faster processing gray1 = cv2.cvtColor(frame1, cv2.COLOR_RGB2GRAY) gray2 = cv2.cvtColor(frame2, cv2.COLOR_RGB2GRAY) gray1 = cv2.resize(gray1, (160, 90)) # Small size for speed gray2 = cv2.resize(gray2, (160, 90)) # Calculate structural similarity difference diff = np.mean(np.abs(gray1.astype(float) - gray2.astype(float))) / 255.0 if diff > threshold: start = max(0, t - chunk_duration/2) end = min(clip.duration, t + chunk_duration/2) scene_changes.append((start, end)) except Exception as e: print(f"⚠️ Frame analysis error at {t:.1f}s: {e}") continue print(f"🎬 Found {len(scene_changes)} scene changes") clip.close() return scene_changes def detect_motion_intensity(video_path, chunk_duration=5, threshold=0.15): """Detect high motion/action moments""" print("🏃 Analyzing motion intensity...") clip = VideoFileClip(video_path) sample_rate = 1 # Check every second times = np.arange(0, clip.duration - 1, sample_rate) motion_moments = [] for t in times: try: # Get two consecutive frames frame1 = clip.get_frame(t) frame2 = clip.get_frame(t + 0.5) # Half second later # Convert to grayscale and resize gray1 = cv2.cvtColor(frame1, cv2.COLOR_RGB2GRAY) gray2 = cv2.cvtColor(frame2, cv2.COLOR_RGB2GRAY) gray1 = cv2.resize(gray1, (160, 90)) gray2 = cv2.resize(gray2, (160, 90)) # Calculate optical flow magnitude flow = cv2.calcOpticalFlowPyrLK(gray1, gray2, np.random.randint(0, 160, (100, 1, 2)).astype(np.float32), None)[0] if flow is not None: motion_magnitude = np.mean(np.linalg.norm(flow.reshape(-1, 2), axis=1)) if motion_magnitude > threshold: start = max(0, t - chunk_duration/2) end = min(clip.duration, t + chunk_duration/2) motion_moments.append((start, end)) except Exception as e: print(f"⚠️ Motion analysis error at {t:.1f}s: {e}") continue print(f"🏃 Found {len(motion_moments)} high-motion moments") clip.close() return motion_moments def detect_speech_emotion(video_path, chunk_duration=5): """Detect emotional/excited speech patterns""" print("😄 Analyzing speech emotions...") # Use Whisper to get detailed speech analysis model = WhisperModel("base", device="cpu", compute_type="int8") segments, _ = model.transcribe(video_path, beam_size=5, vad_filter=True, word_timestamps=True) emotional_moments = [] excitement_keywords = ['wow', 'amazing', 'incredible', 'unbelievable', 'awesome', 'fantastic', 'omg', 'what', 'no way', 'crazy', 'insane', 'perfect', 'yes', 'exactly'] for segment in segments: text = segment.text.lower() # Check for excitement keywords has_keywords = any(keyword in text for keyword in excitement_keywords) # Check for multiple exclamation-worthy patterns has_caps = any(word.isupper() for word in segment.text.split()) has_punctuation = '!' in segment.text or '?' in segment.text is_short_excited = len(text.split()) <= 5 and (has_keywords or has_caps) if has_keywords or has_punctuation or is_short_excited: start = max(0, segment.start - chunk_duration/2) end = min(segment.end + chunk_duration/2, segment.end + chunk_duration) emotional_moments.append((start, end)) print(f"😄 Found {len(emotional_moments)} emotional speech moments") return emotional_moments def detect_audio_peaks(video_path, chunk_duration=5): """Detect sudden audio peaks (bass drops, beats, impacts)""" print("🎵 Analyzing audio peaks...") clip = VideoFileClip(video_path) audio = clip.audio.to_soundarray(fps=22050) # Lower sample rate for speed # Convert to mono if stereo if len(audio.shape) > 1: audio = np.mean(audio, axis=1) # Find spectral peaks (bass, treble spikes) peaks, _ = signal.find_peaks(np.abs(audio), height=np.percentile(np.abs(audio), 95)) peak_moments = [] prev_peak = 0 for peak in peaks: peak_time = peak / 22050 # Avoid too close peaks if peak_time - prev_peak > chunk_duration: start = max(0, peak_time - chunk_duration/2) end = min(clip.duration, peak_time + chunk_duration/2) peak_moments.append((start, end)) prev_peak = peak_time print(f"🎵 Found {len(peak_moments)} audio peak moments") clip.close() return peak_moments def detect_combined_intensity(video_path, chunk_duration=5, weights=None): """Combine multiple detection methods for best moments""" print("🎯 Running comprehensive moment analysis...") if weights is None: weights = {'loud': 0.3, 'scene': 0.2, 'motion': 0.2, 'speech': 0.2, 'peaks': 0.1} # Get all detection results loud_moments = detect_loud_moments(video_path, chunk_duration, threshold_db=5) # Lower threshold scene_moments = detect_scene_changes(video_path, chunk_duration) motion_moments = detect_motion_intensity(video_path, chunk_duration) speech_moments = detect_speech_emotion(video_path, chunk_duration) peak_moments = detect_audio_peaks(video_path, chunk_duration) # Create time-based scoring clip = VideoFileClip(video_path) duration = clip.duration clip.close() # Score each second of the video time_scores = {} for moments, weight in [(loud_moments, weights['loud']), (scene_moments, weights['scene']), (motion_moments, weights['motion']), (speech_moments, weights['speech']), (peak_moments, weights['peaks'])]: for start, end in moments: for t in range(int(start), int(end) + 1): if t not in time_scores: time_scores[t] = 0 time_scores[t] += weight # Find the highest scoring segments if not time_scores: return loud_moments # Fallback to loud moments # Get top scoring time periods sorted_times = sorted(time_scores.items(), key=lambda x: x[1], reverse=True) combined_moments = [] used_times = set() for time_sec, score in sorted_times: if time_sec not in used_times and score > 0.3: # Minimum threshold start = max(0, time_sec - chunk_duration/2) end = min(duration, time_sec + chunk_duration/2) combined_moments.append((start, end)) # Mark nearby times as used to avoid overlap for t in range(max(0, time_sec - chunk_duration), min(int(duration), time_sec + chunk_duration)): used_times.add(t) print(f"🎯 Found {len(combined_moments)} high-intensity combined moments") return combined_moments def transcribe_and_extract_subtitles(video_path, start, end): print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...") model = WhisperModel("base", device="cpu", compute_type="int8") segments, _ = model.transcribe(video_path, beam_size=5, language="en", vad_filter=True) subtitles = [] for segment in segments: if start <= segment.start <= end: subtitles.append((segment.start - start, segment.end - start, segment.text)) return subtitles def create_short_clip(video_path, start, end, subtitles, output_path): print(f"🎬 Creating short: {output_path}") clip = VideoFileClip(video_path).subclipped(start, end) video_duration = clip.duration print(f"📏 Video clip duration: {video_duration:.2f}s") vertical_clip = clip.resized(height=1920).cropped(width=1080, x_center=clip.w / 2) clips = [vertical_clip] subtitle_y_px = 1550 # Fixed Y position for subtitles for (s, e, text) in subtitles: try: subtitle_start = max(0, s) subtitle_end = min(e, video_duration) if subtitle_start >= video_duration or subtitle_end <= subtitle_start: print(f"⚠️ Skipping subtitle outside video duration: {text[:30]}...") continue words = text.strip().split() if not words: continue # Split into small readable chunks (max ~3-4 words) chunks = [] current_chunk = [] for word in words: current_chunk.append(word) if len(current_chunk) >= 2 or len(' '.join(current_chunk)) > 25: chunks.append(' '.join(current_chunk)) current_chunk = [] if current_chunk: chunks.append(' '.join(current_chunk)) chunk_duration = (subtitle_end - subtitle_start) / len(chunks) for chunk_idx, chunk_text in enumerate(chunks): chunk_start = subtitle_start + (chunk_idx * chunk_duration) chunk_end = min(chunk_start + chunk_duration, subtitle_end) chunk_words = chunk_text.split() # Base subtitle base_subtitle = TextClip( text=chunk_text.upper(), font_size=65, color='white', stroke_color='black', stroke_width=5 ) text_width, _ = base_subtitle.size base_subtitle = base_subtitle.with_start(chunk_start).with_end(chunk_end).with_position(('center', subtitle_y_px)) clips.append(base_subtitle) # Highlighted words (perfectly aligned) word_duration = chunk_duration / len(chunk_words) current_x = 540 - (text_width / 2) # 540 is center X of 1080px width for i, word in enumerate(chunk_words): word_start = chunk_start + (i * word_duration) word_end = min(word_start + word_duration * 0.8, chunk_end) highlighted_word = TextClip( text=word.upper(), font_size=68, color='#FFD700', stroke_color='#FF6B35', stroke_width=5 ) word_width, _ = highlighted_word.size word_x = current_x + (word_width / 2) highlighted_word = highlighted_word.with_start(word_start).with_end(word_end).with_position((word_x -125 , subtitle_y_px)) clips.append(highlighted_word) current_x += word_width + 20 # Add spacing between words print(f"✅ Added Opus-style subtitle ({subtitle_start:.1f}s-{subtitle_end:.1f}s): {text[:30]}...") except Exception as e: print(f"⚠️ Subtitle error: {e}, skipping subtitle: {text[:50]}...") continue final = CompositeVideoClip(clips, size=(1080, 1920)) final.write_videofile(output_path, codec="libx264", audio_codec="aac", threads=1) clip.reader.close() if clip.audio: clip.audio.reader.close() final.close() def validate_video(video_path, min_duration=30): """Validate video file and return duration""" try: clip = VideoFileClip(video_path) duration = clip.duration clip.close() if duration < min_duration: raise ValueError(f"Video is too short ({duration:.1f}s). Minimum {min_duration}s required.") return duration except Exception as e: if "No such file" in str(e): raise FileNotFoundError(f"Video file not found: {video_path}") elif "could not open" in str(e).lower(): raise ValueError(f"Invalid or corrupted video file: {video_path}") else: raise ValueError(f"Error reading video: {str(e)}") def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_callback=None, threshold_db=-30, clip_duration=5, detection_mode="loud"): os.makedirs(output_folder, exist_ok=True) # Validate video first try: video_duration = validate_video(video_path, min_duration=clip_duration * 2) if progress_callback: progress_callback(f"✅ Video validated ({video_duration:.1f}s)", 5) except Exception as e: if progress_callback: progress_callback(f"❌ Video validation failed", 0) raise e # Choose detection method based on mode if detection_mode == "loud": if progress_callback: progress_callback("🔍 Analyzing audio for loud moments...", 10) best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db) elif detection_mode == "scene": if progress_callback: progress_callback("🎬 Analyzing scene changes...", 10) best_moments = detect_scene_changes(video_path, chunk_duration=clip_duration) elif detection_mode == "motion": if progress_callback: progress_callback("🏃 Analyzing motion intensity...", 10) best_moments = detect_motion_intensity(video_path, chunk_duration=clip_duration) elif detection_mode == "speech": if progress_callback: progress_callback("😄 Analyzing speech emotions...", 10) best_moments = detect_speech_emotion(video_path, chunk_duration=clip_duration) elif detection_mode == "peaks": if progress_callback: progress_callback("🎵 Analyzing audio peaks...", 10) best_moments = detect_audio_peaks(video_path, chunk_duration=clip_duration) elif detection_mode == "combined": if progress_callback: progress_callback("🎯 Running comprehensive analysis...", 10) best_moments = detect_combined_intensity(video_path, chunk_duration=clip_duration) else: best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db) selected = best_moments[:max_clips] if not selected: mode_name = { "loud": "loud moments", "scene": "scene changes", "motion": "motion intensity", "speech": "emotional speech", "peaks": "audio peaks", "combined": "interesting moments" }.get(detection_mode, "moments") raise ValueError(f"No {mode_name} found. Try a different detection mode or adjust settings.") if progress_callback: progress_callback(f"📊 Found {len(selected)} clips to generate", 20) for i, (start, end) in enumerate(selected): if progress_callback: progress_callback(f"🗣️ Transcribing clip {i+1}/{len(selected)}", 30 + (i * 20)) subtitles = transcribe_and_extract_subtitles(video_path, start, end) out_path = os.path.join(output_folder, f"short_{i+1}.mp4") if progress_callback: progress_callback(f"🎬 Creating video {i+1}/{len(selected)}", 50 + (i * 20)) create_short_clip(video_path, start, end, subtitles, out_path) if progress_callback: progress_callback("✅ All shorts generated successfully!", 100) # GUI Components class ShortsGeneratorGUI: def __init__(self, root): self.root = root self.root.title("🎬 AI Shorts Generator - Advanced Video Moment Detection") self.root.geometry("650x650") # Reduced height to eliminate empty space self.root.minsize(600, 600) # Reduced minimum size self.video_path = None self.output_folder = "shorts" self.max_clips = 3 self.threshold_db = -30 self.clip_duration = 5 self.create_widgets() def create_widgets(self): # Title title_label = tk.Label(self.root, text="🎬 AI Shorts Generator", font=("Arial", 16, "bold")) title_label.pack(pady=10) # Video selection video_frame = tk.Frame(self.root) video_frame.pack(pady=10, padx=20, fill="x") tk.Label(video_frame, text="Select Video File:").pack(anchor="w") video_select_frame = tk.Frame(video_frame) video_select_frame.pack(fill="x", pady=5) self.video_label = tk.Label(video_select_frame, text="No video selected", bg="white", relief="sunken") self.video_label.pack(side="left", fill="x", expand=True, padx=(0, 5)) tk.Button(video_select_frame, text="Browse", command=self.select_video).pack(side="right") # Output folder selection output_frame = tk.Frame(self.root) output_frame.pack(pady=10, padx=20, fill="x") tk.Label(output_frame, text="Output Folder:").pack(anchor="w") output_select_frame = tk.Frame(output_frame) output_select_frame.pack(fill="x", pady=5) self.output_label = tk.Label(output_select_frame, text="shorts/", bg="white", relief="sunken") self.output_label.pack(side="left", fill="x", expand=True, padx=(0, 5)) tk.Button(output_select_frame, text="Browse", command=self.select_output_folder).pack(side="right") # Settings frame settings_frame = tk.LabelFrame(self.root, text="Settings", padx=10, pady=10) settings_frame.pack(pady=10, padx=20, fill="x") # Max clips with on/off toggle clips_frame = tk.Frame(settings_frame) clips_frame.pack(fill="x", pady=5) clips_left_frame = tk.Frame(clips_frame) clips_left_frame.pack(side="left") self.use_max_clips = tk.BooleanVar(value=True) clips_checkbox = tk.Checkbutton(clips_left_frame, variable=self.use_max_clips, text="Max Clips to Generate:") clips_checkbox.pack(side="left") self.clips_var = tk.IntVar(value=3) self.clips_spinbox = tk.Spinbox(clips_frame, from_=1, to=10, width=5, textvariable=self.clips_var) self.clips_spinbox.pack(side="right") # Bind checkbox to enable/disable spinbox def toggle_clips_limit(): if self.use_max_clips.get(): self.clips_spinbox.config(state="normal") else: self.clips_spinbox.config(state="disabled") self.use_max_clips.trace("w", lambda *args: toggle_clips_limit()) clips_checkbox.config(command=toggle_clips_limit) # Add tooltip for max clips setting clips_tooltip_text = """Max Clips Control: • Checked: Limit the number of clips generated • Unchecked: Generate all detected moments • 1-3 clips: Quick highlights for social media • 4-6 clips: Good variety pack • 7-10 clips: Comprehensive highlight reel Tip: Start with 3 clips, then increase if you want more content""" ToolTip(self.clips_spinbox, clips_tooltip_text, side='right') ToolTip(clips_checkbox, clips_tooltip_text, side='right') # Detection Mode Selection detection_frame = tk.Frame(settings_frame) detection_frame.pack(fill="x", pady=5) tk.Label(detection_frame, text="Detection Mode:", font=("Arial", 9, "bold")).pack(side="left") self.detection_mode_var = tk.StringVar(value="loud") self.detection_display_var = tk.StringVar(value="🔊 Loud Moments") detection_dropdown = ttk.Combobox(detection_frame, textvariable=self.detection_display_var, values=["🔊 Loud Moments", "🎬 Scene Changes", "🏃 Motion Intensity", "😄 Emotional Speech", "🎵 Audio Peaks", "🎯 Smart Combined"], state="readonly", width=22) detection_dropdown.pack(side="right") # Store the mapping between display text and internal values self.mode_mapping = { "🔊 Loud Moments": "loud", "🎬 Scene Changes": "scene", "🏃 Motion Intensity": "motion", "😄 Emotional Speech": "speech", "🎵 Audio Peaks": "peaks", "🎯 Smart Combined": "combined" } # Simple, clear descriptions for mode tooltips mode_descriptions = { "🔊 Loud Moments": """Analyzes audio volume levels to find the loudest parts of your video. • Best for: Gaming reactions, music highlights, shouting moments • Finds: High-volume audio segments above the threshold • Ideal when: Your video has clear volume differences • Tip: Adjust threshold if too many/few moments found""", "🎬 Scene Changes": """Detects dramatic visual transitions and cuts in your video. • Best for: Movie trailers, montages, location changes • Finds: Major visual shifts between frames • Ideal when: Video has multiple scenes or camera angles • Tip: Great for content with quick cuts or transitions""", "🏃 Motion Intensity": """Analyzes movement and action within video frames. • Best for: Sports highlights, dance videos, action scenes • Finds: High-movement moments with lots of visual activity • Ideal when: Video contains physical action or movement • Tip: Perfect for extracting the most dynamic moments""", "😄 Emotional Speech": """Uses AI to detect excited, emotional, or emphatic speech patterns. • Best for: Reactions, reviews, commentary, tutorials • Finds: Words like 'wow', 'amazing', exclamations, excited tone • Ideal when: Video has spoken content with emotional moments • Tip: Captures the most engaging verbal reactions""", "🎵 Audio Peaks": """Detects sudden audio spikes like bass drops, impacts, or sound effects. • Best for: Music videos, sound effect moments, beat drops • Finds: Sharp increases in audio frequency or volume • Ideal when: Video has musical elements or sound effects • Tip: Great for rhythm-based or audio-driven content""", "🎯 Smart Combined": """Intelligently combines all detection methods for optimal results. • Best for: Any video type, general content, unsure what to use • Finds: Moments scoring high across multiple analysis methods • Ideal when: You want the most 'interesting' overall moments • Tip: Recommended starting point for most videos""" } # Create tooltip for the dropdown (updates when selection changes) current_tooltip_text = mode_descriptions["🔊 Loud Moments"] # Default dropdown_tooltip = ToolTip(detection_dropdown, current_tooltip_text) # Update tooltip when selection changes def on_detection_change(event): selection = detection_dropdown.get() mode_map = { "🔊 Loud Moments": "loud", "🎬 Scene Changes": "scene", "🏃 Motion Intensity": "motion", "😄 Emotional Speech": "speech", "🎵 Audio Peaks": "peaks", "🎯 Smart Combined": "combined" } self.detection_mode_var.set(mode_map.get(selection, "loud")) # Update tooltip text for the selected mode dropdown_tooltip.text = mode_descriptions.get(selection, "Select a detection mode") # Show/hide threshold setting based on mode if selection == "🔊 Loud Moments": threshold_frame.pack(fill="x", pady=5) else: threshold_frame.pack_forget() detection_dropdown.bind("<>", on_detection_change) # Audio threshold (only shown for loud moments) threshold_frame = tk.Frame(settings_frame) threshold_frame.pack(fill="x", pady=5) threshold_label = tk.Label(threshold_frame, text="Audio Threshold (dB):") threshold_label.pack(side="left") self.threshold_var = tk.IntVar(value=-30) threshold_spinbox = tk.Spinbox(threshold_frame, from_=-50, to=0, width=5, textvariable=self.threshold_var) threshold_spinbox.pack(side="right") # Add tooltip for threshold setting threshold_tooltip_text = """Audio Threshold Control: • Higher values (closer to 0): Only very loud moments • Lower values (closer to -50): More moments detected • Default -30 dB: Good balance for most videos • Adjust based on your video's audio levels Example: Gaming videos might need -20 dB, quiet vlogs might need -40 dB""" ToolTip(threshold_spinbox, threshold_tooltip_text, side='right') # Clip duration (increased to 120 seconds max) duration_frame = tk.Frame(settings_frame) duration_frame.pack(fill="x", pady=5) duration_label = tk.Label(duration_frame, text="Clip Duration (seconds):") duration_label.pack(side="left") self.duration_var = tk.IntVar(value=5) duration_spinbox = tk.Spinbox(duration_frame, from_=3, to=120, width=5, textvariable=self.duration_var) duration_spinbox.pack(side="right") # Add tooltip for duration setting duration_tooltip_text = """Clip Duration Setting: • 3-10 seconds: Perfect for TikTok/Instagram Reels • 10-30 seconds: Good for YouTube Shorts • 30-60 seconds: Longer form highlights • 60+ seconds: Extended content clips Shorter clips = more viral potential Longer clips = more context and story""" ToolTip(duration_spinbox, duration_tooltip_text, side='right') # Preview button self.preview_btn = tk.Button(self.root, text="🔍 Preview Clips", command=self.preview_clips, bg="#2196F3", fg="white", font=("Arial", 10, "bold"), pady=5) self.preview_btn.pack(pady=5) # Add tooltip for preview button preview_tooltip_text = """Preview Clips Feature: • Analyzes your video using the selected detection mode • Shows all detected moments with timestamps • Lets you select specific clips to generate • No video files created - just analysis • Great for testing settings before full generation Tip: Always preview first to see what the AI finds!""" ToolTip(self.preview_btn, preview_tooltip_text, side='right') # Generate button self.generate_btn = tk.Button(self.root, text="🎬 Generate Shorts", command=self.start_generation, bg="#4CAF50", fg="white", font=("Arial", 12, "bold"), pady=10) self.generate_btn.pack(pady=10) # Add tooltip for generate button generate_tooltip_text = """Generate Shorts Feature: • Creates actual video files from detected moments • Adds AI-generated subtitles to each clip • Formats videos for vertical social media (1080x1920) • Saves clips to your selected output folder • Takes longer but creates ready-to-post content Tip: Use Preview first to fine-tune your settings!""" ToolTip(self.generate_btn, generate_tooltip_text, side='right') # Progress frame progress_frame = tk.Frame(self.root) progress_frame.pack(pady=5, padx=20, fill="x") self.progress_label = tk.Label(progress_frame, text="Ready to generate shorts") self.progress_label.pack() self.progress_bar = ttk.Progressbar(progress_frame, length=400, mode="determinate") self.progress_bar.pack(pady=3) def select_video(self): file_path = filedialog.askopenfilename( title="Select Video File", filetypes=[("Video files", "*.mp4 *.mov *.avi *.mkv *.wmv")] ) if file_path: self.video_path = file_path self.video_label.config(text=os.path.basename(file_path)) def select_output_folder(self): folder_path = filedialog.askdirectory(title="Select Output Folder") if folder_path: self.output_folder = folder_path self.output_label.config(text=folder_path) def preview_clips(self): if not self.video_path: messagebox.showwarning("Warning", "Please select a video file first!") return try: # Validate video first validate_video(self.video_path, min_duration=self.duration_var.get() * 2) # Analyze using selected detection mode self.preview_btn.config(state="disabled", text="Analyzing...") self.root.update() detection_mode = self.detection_mode_var.get() if detection_mode == "loud": moments = detect_loud_moments( self.video_path, chunk_duration=self.duration_var.get(), threshold_db=self.threshold_var.get() ) mode_name = "loud moments" elif detection_mode == "scene": moments = detect_scene_changes(self.video_path, chunk_duration=self.duration_var.get()) mode_name = "scene changes" elif detection_mode == "motion": moments = detect_motion_intensity(self.video_path, chunk_duration=self.duration_var.get()) mode_name = "motion moments" elif detection_mode == "speech": moments = detect_speech_emotion(self.video_path, chunk_duration=self.duration_var.get()) mode_name = "emotional speech" elif detection_mode == "peaks": moments = detect_audio_peaks(self.video_path, chunk_duration=self.duration_var.get()) mode_name = "audio peaks" elif detection_mode == "combined": moments = detect_combined_intensity(self.video_path, chunk_duration=self.duration_var.get()) mode_name = "interesting moments" else: moments = detect_loud_moments( self.video_path, chunk_duration=self.duration_var.get(), threshold_db=self.threshold_var.get() ) mode_name = "loud moments" if not moments: messagebox.showinfo("Preview", f"No {mode_name} found.\nTry a different detection mode or adjust settings.") return # Show preview window preview_window = tk.Toplevel(self.root) preview_window.title("Preview and Select Clips") preview_window.geometry("500x400") tk.Label(preview_window, text=f"Found {len(moments)} {mode_name}:", font=("Arial", 12, "bold")).pack(pady=10) # Create scrollable frame for checkboxes canvas = tk.Canvas(preview_window) scrollbar = tk.Scrollbar(preview_window, orient="vertical", command=canvas.yview) scrollable_frame = tk.Frame(canvas) scrollable_frame.bind( "", lambda e: canvas.configure(scrollregion=canvas.bbox("all")) ) canvas.create_window((0, 0), window=scrollable_frame, anchor="nw") canvas.configure(yscrollcommand=scrollbar.set) # Store checkbox variables and clip data self.clip_vars = [] # Use all clips if max clips is disabled, otherwise limit by setting clips_to_show = moments if not self.use_max_clips.get() else moments[:self.clips_var.get()] self.preview_clips_data = clips_to_show # Add selectable clips with checkboxes for i, (start, end) in enumerate(self.preview_clips_data, 1): duration = end - start time_str = f"Clip {i}: {start//60:02.0f}:{start%60:05.2f} - {end//60:02.0f}:{end%60:05.2f} ({duration:.1f}s)" clip_var = tk.BooleanVar(value=True) # Default selected self.clip_vars.append(clip_var) clip_frame = tk.Frame(scrollable_frame) clip_frame.pack(fill="x", padx=10, pady=2) checkbox = tk.Checkbutton(clip_frame, variable=clip_var, text=time_str, font=("Courier", 10), anchor="w") checkbox.pack(fill="x") canvas.pack(side="left", fill="both", expand=True, padx=10, pady=5) scrollbar.pack(side="right", fill="y") # Button frame button_frame = tk.Frame(preview_window) button_frame.pack(fill="x", padx=10, pady=10) # Select/Deselect all buttons control_frame = tk.Frame(button_frame) control_frame.pack(fill="x", pady=5) tk.Button(control_frame, text="Select All", command=lambda: [var.set(True) for var in self.clip_vars]).pack(side="left", padx=5) tk.Button(control_frame, text="Deselect All", command=lambda: [var.set(False) for var in self.clip_vars]).pack(side="left", padx=5) # Generate selected clips button (fixed size for full text visibility) generate_selected_btn = tk.Button(button_frame, text="🎬 Generate Selected Clips", command=lambda: self.generate_selected_clips(preview_window), bg="#4CAF50", fg="white", font=("Arial", 11, "bold"), pady=8, width=25) generate_selected_btn.pack(fill="x", pady=5) # Close button tk.Button(button_frame, text="Close", command=preview_window.destroy).pack(pady=5) except Exception as e: messagebox.showerror("Preview Error", f"Error analyzing video: {str(e)}") finally: self.preview_btn.config(state="normal", text="🔍 Preview Clips") def generate_selected_clips(self, preview_window): """Generate only the selected clips from preview""" try: # Get selected clips selected_clips = [] for i, (clip_var, clip_data) in enumerate(zip(self.clip_vars, self.preview_clips_data)): if clip_var.get(): selected_clips.append((i+1, clip_data)) # (clip_number, (start, end)) if not selected_clips: messagebox.showwarning("Warning", "Please select at least one clip to generate!") return # Close preview window preview_window.destroy() # Show confirmation clip_count = len(selected_clips) clip_numbers = [str(num) for num, _ in selected_clips] confirm_msg = f"Generate {clip_count} selected clips (#{', #'.join(clip_numbers)})?" if not messagebox.askyesno("Confirm Generation", confirm_msg): return # Start generation in background thread self.selected_clips_data = [clip_data for _, clip_data in selected_clips] self.generate_btn.config(state="disabled", text="Generating Selected...") thread = threading.Thread(target=self.selected_generation_worker) thread.daemon = True thread.start() except Exception as e: messagebox.showerror("Generation Error", f"Error starting generation: {str(e)}") def selected_generation_worker(self): """Generate only selected clips""" try: # Check available disk space import shutil free_space_gb = shutil.disk_usage(self.output_folder)[2] / (1024**3) if free_space_gb < 1: raise RuntimeError(f"Insufficient disk space. Only {free_space_gb:.1f} GB available. Need at least 1 GB.") # Validate video first try: video_duration = validate_video(self.video_path, min_duration=self.duration_var.get() * 2) self.update_progress(f"✅ Video validated ({video_duration:.1f}s)", 5) except Exception as e: self.update_progress(f"❌ Video validation failed", 0) raise e os.makedirs(self.output_folder, exist_ok=True) selected_count = len(self.selected_clips_data) self.update_progress(f"📊 Generating {selected_count} selected clips", 10) for i, (start, end) in enumerate(self.selected_clips_data): self.update_progress(f"🗣️ Transcribing clip {i+1}/{selected_count}", 20 + (i * 30)) subtitles = transcribe_and_extract_subtitles(self.video_path, start, end) out_path = os.path.join(self.output_folder, f"short_{i+1}.mp4") self.update_progress(f"🎬 Creating video {i+1}/{selected_count}", 40 + (i * 30)) create_short_clip(self.video_path, start, end, subtitles, out_path) self.update_progress("✅ Selected clips generated successfully!", 100) messagebox.showinfo("Success", f"Successfully generated {selected_count} selected clips in '{self.output_folder}' folder!") except FileNotFoundError as e: messagebox.showerror("File Error", str(e)) except ValueError as e: messagebox.showerror("Video Error", str(e)) except RuntimeError as e: messagebox.showerror("System Error", str(e)) except Exception as e: messagebox.showerror("Error", f"An unexpected error occurred: {str(e)}") finally: self.generate_btn.config(state="normal", text="🎬 Generate Shorts") self.progress_bar["value"] = 0 self.progress_label.config(text="Ready to generate shorts") def update_progress(self, message, percent): self.progress_label.config(text=message) self.progress_bar["value"] = percent self.root.update() def generation_worker(self): try: # Check available disk space import shutil free_space_gb = shutil.disk_usage(self.output_folder)[2] / (1024**3) if free_space_gb < 1: raise RuntimeError(f"Insufficient disk space. Only {free_space_gb:.1f} GB available. Need at least 1 GB.") generate_shorts( self.video_path, max_clips=self.clips_var.get() if self.use_max_clips.get() else 10, # Default max for non-loud modes output_folder=self.output_folder, progress_callback=self.update_progress, threshold_db=self.threshold_var.get(), clip_duration=self.duration_var.get(), detection_mode=self.detection_mode_var.get() ) messagebox.showinfo("Success", f"Successfully generated shorts in '{self.output_folder}' folder!") except FileNotFoundError as e: messagebox.showerror("File Error", str(e)) except ValueError as e: messagebox.showerror("Video Error", str(e)) except RuntimeError as e: messagebox.showerror("System Error", str(e)) except Exception as e: messagebox.showerror("Error", f"An unexpected error occurred: {str(e)}") finally: self.generate_btn.config(state="normal", text="🎬 Generate Shorts") self.progress_bar["value"] = 0 self.progress_label.config(text="Ready to generate shorts") def start_generation(self): if not self.video_path: messagebox.showwarning("Warning", "Please select a video file first!") return self.generate_btn.config(state="disabled", text="Generating...") thread = threading.Thread(target=self.generation_worker) thread.daemon = True thread.start() def run_gui(): root = tk.Tk() app = ShortsGeneratorGUI(root) root.mainloop() if __name__ == "__main__": import sys if len(sys.argv) > 1 and sys.argv[1] != "--gui": # Run command line mode try: generate_shorts(sys.argv[1]) print("✅ Shorts generation completed successfully!") except Exception as e: print(f"❌ Error: {str(e)}") else: # Run GUI mode (default) run_gui()