Update requirements and enhance shorts generator with advanced detection modes and tooltips

This commit is contained in:
klop51 2025-08-09 15:45:24 +02:00
parent 491040b148
commit bd55be0448
3 changed files with 601 additions and 36 deletions

View File

@ -1,3 +1,6 @@
moviepy>=1.0.3
faster-whisper>=0.10.0
numpy>=1.21.0
opencv-python>=4.5.0
scipy>=1.7.0
librosa>=0.9.0

View File

@ -5,6 +5,150 @@ from faster_whisper import WhisperModel
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import threading
import cv2
from scipy import signal
import librosa
class ToolTip:
"""Create a tooltip for a given widget"""
def __init__(self, widget, text='widget info', side='right'):
self.widget = widget
self.text = text
self.side = side
self.widget.bind("<Enter>", self.enter)
self.widget.bind("<Leave>", self.leave)
self.tipwindow = None
def enter(self, event=None):
self.showtip()
def leave(self, event=None):
self.hidetip()
def showtip(self):
if self.tipwindow or not self.text:
return
# Get widget position
x = self.widget.winfo_rootx()
y = self.widget.winfo_rooty()
w = self.widget.winfo_width()
h = self.widget.winfo_height()
# Position tooltip based on side preference
if self.side == 'right':
x = x + w + 10 # 10px to the right of widget
y = y
else:
x = x + 25
y = y + h + 5
self.tipwindow = tw = tk.Toplevel(self.widget)
tw.wm_overrideredirect(True)
tw.wm_geometry("+%d+%d" % (x, y))
label = tk.Label(tw, text=self.text, justify=tk.LEFT,
background="#ffffe0", relief=tk.SOLID, borderwidth=1,
font=("Arial", "9", "normal"), wraplength=350)
label.pack(ipadx=5, ipady=3)
def hidetip(self):
tw = self.tipwindow
self.tipwindow = None
if tw:
tw.destroy()
class ComboboxTooltip:
"""Special tooltip for combobox that shows on listbox hover"""
def __init__(self, combobox, descriptions):
self.combobox = combobox
self.descriptions = descriptions
self.tipwindow = None
self.bound_listbox = None
# Bind to combobox events
self.combobox.bind("<Button-1>", self.on_click)
self.combobox.bind("<KeyPress>", self.on_keypress)
def on_click(self, event):
# Try to find the listbox when dropdown opens
self.combobox.after(50, self.bind_listbox)
def on_keypress(self, event):
# Handle keyboard navigation
self.combobox.after(50, self.bind_listbox)
def bind_listbox(self):
# Find the listbox widget more reliably
try:
# Look through all toplevel windows for the combobox popdown
for window in self.combobox.winfo_toplevel().winfo_children():
window_class = window.winfo_class()
if window_class == 'Toplevel':
# Found a toplevel, look for listbox inside
for child in window.winfo_children():
if child.winfo_class() == 'Listbox':
if self.bound_listbox != child:
self.bound_listbox = child
child.bind("<Motion>", self.on_listbox_motion)
child.bind("<Leave>", self.on_listbox_leave)
child.bind("<ButtonRelease-1>", self.on_listbox_leave)
return
except Exception as e:
# Fallback method - try to find any listbox
try:
# Alternative approach: look for the popdown frame
for child in self.combobox.tk.call('winfo', 'children', '.'):
if 'popdown' in str(child):
popdown = self.combobox.nametowidget(child)
for subchild in popdown.winfo_children():
if subchild.winfo_class() == 'Listbox':
if self.bound_listbox != subchild:
self.bound_listbox = subchild
subchild.bind("<Motion>", self.on_listbox_motion)
subchild.bind("<Leave>", self.on_listbox_leave)
subchild.bind("<ButtonRelease-1>", self.on_listbox_leave)
return
except:
pass
def on_listbox_motion(self, event):
try:
listbox = event.widget
index = listbox.nearest(event.y)
if 0 <= index < len(self.combobox['values']):
selection = self.combobox['values'][index]
if selection in self.descriptions:
self.show_tooltip(event, self.descriptions[selection])
except Exception:
pass
def on_listbox_leave(self, event):
self.hide_tooltip()
def show_tooltip(self, event, text):
self.hide_tooltip() # Hide any existing tooltip
try:
x = event.widget.winfo_rootx() + event.widget.winfo_width() + 10
y = event.widget.winfo_rooty() + event.y - 20
self.tipwindow = tw = tk.Toplevel(event.widget)
tw.wm_overrideredirect(True)
tw.wm_geometry("+%d+%d" % (x, y))
label = tk.Label(tw, text=text, justify=tk.LEFT,
background="#ffffe0", relief=tk.SOLID, borderwidth=1,
font=("Arial", "9", "normal"), wraplength=350)
label.pack(ipadx=5, ipady=3)
except Exception:
pass
def hide_tooltip(self):
if self.tipwindow:
try:
self.tipwindow.destroy()
except:
pass
self.tipwindow = None
def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
print("🔍 Analyzing audio...")
@ -28,6 +172,208 @@ def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
clip.close()
return loud_chunks
def detect_scene_changes(video_path, chunk_duration=5, threshold=0.3):
"""Detect dramatic visual scene changes"""
print("🎬 Analyzing scene changes...")
clip = VideoFileClip(video_path)
# Sample frames at regular intervals
sample_rate = 2 # Check every 2 seconds
times = np.arange(0, clip.duration, sample_rate)
scene_changes = []
prev_frame = None
for i, t in enumerate(times[:-1]):
try:
# Get current and next frame
frame1 = clip.get_frame(t)
frame2 = clip.get_frame(times[i + 1])
# Convert to grayscale and resize for faster processing
gray1 = cv2.cvtColor(frame1, cv2.COLOR_RGB2GRAY)
gray2 = cv2.cvtColor(frame2, cv2.COLOR_RGB2GRAY)
gray1 = cv2.resize(gray1, (160, 90)) # Small size for speed
gray2 = cv2.resize(gray2, (160, 90))
# Calculate structural similarity difference
diff = np.mean(np.abs(gray1.astype(float) - gray2.astype(float))) / 255.0
if diff > threshold:
start = max(0, t - chunk_duration/2)
end = min(clip.duration, t + chunk_duration/2)
scene_changes.append((start, end))
except Exception as e:
print(f"⚠️ Frame analysis error at {t:.1f}s: {e}")
continue
print(f"🎬 Found {len(scene_changes)} scene changes")
clip.close()
return scene_changes
def detect_motion_intensity(video_path, chunk_duration=5, threshold=0.15):
"""Detect high motion/action moments"""
print("🏃 Analyzing motion intensity...")
clip = VideoFileClip(video_path)
sample_rate = 1 # Check every second
times = np.arange(0, clip.duration - 1, sample_rate)
motion_moments = []
for t in times:
try:
# Get two consecutive frames
frame1 = clip.get_frame(t)
frame2 = clip.get_frame(t + 0.5) # Half second later
# Convert to grayscale and resize
gray1 = cv2.cvtColor(frame1, cv2.COLOR_RGB2GRAY)
gray2 = cv2.cvtColor(frame2, cv2.COLOR_RGB2GRAY)
gray1 = cv2.resize(gray1, (160, 90))
gray2 = cv2.resize(gray2, (160, 90))
# Calculate optical flow magnitude
flow = cv2.calcOpticalFlowPyrLK(gray1, gray2,
np.random.randint(0, 160, (100, 1, 2)).astype(np.float32),
None)[0]
if flow is not None:
motion_magnitude = np.mean(np.linalg.norm(flow.reshape(-1, 2), axis=1))
if motion_magnitude > threshold:
start = max(0, t - chunk_duration/2)
end = min(clip.duration, t + chunk_duration/2)
motion_moments.append((start, end))
except Exception as e:
print(f"⚠️ Motion analysis error at {t:.1f}s: {e}")
continue
print(f"🏃 Found {len(motion_moments)} high-motion moments")
clip.close()
return motion_moments
def detect_speech_emotion(video_path, chunk_duration=5):
"""Detect emotional/excited speech patterns"""
print("😄 Analyzing speech emotions...")
# Use Whisper to get detailed speech analysis
model = WhisperModel("base", device="cpu", compute_type="int8")
segments, _ = model.transcribe(video_path, beam_size=5, vad_filter=True, word_timestamps=True)
emotional_moments = []
excitement_keywords = ['wow', 'amazing', 'incredible', 'unbelievable', 'awesome', 'fantastic',
'omg', 'what', 'no way', 'crazy', 'insane', 'perfect', 'yes', 'exactly']
for segment in segments:
text = segment.text.lower()
# Check for excitement keywords
has_keywords = any(keyword in text for keyword in excitement_keywords)
# Check for multiple exclamation-worthy patterns
has_caps = any(word.isupper() for word in segment.text.split())
has_punctuation = '!' in segment.text or '?' in segment.text
is_short_excited = len(text.split()) <= 5 and (has_keywords or has_caps)
if has_keywords or has_punctuation or is_short_excited:
start = max(0, segment.start - chunk_duration/2)
end = min(segment.end + chunk_duration/2, segment.end + chunk_duration)
emotional_moments.append((start, end))
print(f"😄 Found {len(emotional_moments)} emotional speech moments")
return emotional_moments
def detect_audio_peaks(video_path, chunk_duration=5):
"""Detect sudden audio peaks (bass drops, beats, impacts)"""
print("🎵 Analyzing audio peaks...")
clip = VideoFileClip(video_path)
audio = clip.audio.to_soundarray(fps=22050) # Lower sample rate for speed
# Convert to mono if stereo
if len(audio.shape) > 1:
audio = np.mean(audio, axis=1)
# Find spectral peaks (bass, treble spikes)
peaks, _ = signal.find_peaks(np.abs(audio), height=np.percentile(np.abs(audio), 95))
peak_moments = []
prev_peak = 0
for peak in peaks:
peak_time = peak / 22050
# Avoid too close peaks
if peak_time - prev_peak > chunk_duration:
start = max(0, peak_time - chunk_duration/2)
end = min(clip.duration, peak_time + chunk_duration/2)
peak_moments.append((start, end))
prev_peak = peak_time
print(f"🎵 Found {len(peak_moments)} audio peak moments")
clip.close()
return peak_moments
def detect_combined_intensity(video_path, chunk_duration=5, weights=None):
"""Combine multiple detection methods for best moments"""
print("🎯 Running comprehensive moment analysis...")
if weights is None:
weights = {'loud': 0.3, 'scene': 0.2, 'motion': 0.2, 'speech': 0.2, 'peaks': 0.1}
# Get all detection results
loud_moments = detect_loud_moments(video_path, chunk_duration, threshold_db=5) # Lower threshold
scene_moments = detect_scene_changes(video_path, chunk_duration)
motion_moments = detect_motion_intensity(video_path, chunk_duration)
speech_moments = detect_speech_emotion(video_path, chunk_duration)
peak_moments = detect_audio_peaks(video_path, chunk_duration)
# Create time-based scoring
clip = VideoFileClip(video_path)
duration = clip.duration
clip.close()
# Score each second of the video
time_scores = {}
for moments, weight in [(loud_moments, weights['loud']),
(scene_moments, weights['scene']),
(motion_moments, weights['motion']),
(speech_moments, weights['speech']),
(peak_moments, weights['peaks'])]:
for start, end in moments:
for t in range(int(start), int(end) + 1):
if t not in time_scores:
time_scores[t] = 0
time_scores[t] += weight
# Find the highest scoring segments
if not time_scores:
return loud_moments # Fallback to loud moments
# Get top scoring time periods
sorted_times = sorted(time_scores.items(), key=lambda x: x[1], reverse=True)
combined_moments = []
used_times = set()
for time_sec, score in sorted_times:
if time_sec not in used_times and score > 0.3: # Minimum threshold
start = max(0, time_sec - chunk_duration/2)
end = min(duration, time_sec + chunk_duration/2)
combined_moments.append((start, end))
# Mark nearby times as used to avoid overlap
for t in range(max(0, time_sec - chunk_duration),
min(int(duration), time_sec + chunk_duration)):
used_times.add(t)
print(f"🎯 Found {len(combined_moments)} high-intensity combined moments")
return combined_moments
def transcribe_and_extract_subtitles(video_path, start, end):
print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...")
model = WhisperModel("base", device="cpu", compute_type="int8")
@ -150,7 +496,8 @@ def validate_video(video_path, min_duration=30):
else:
raise ValueError(f"Error reading video: {str(e)}")
def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_callback=None, threshold_db=-30, clip_duration=5):
def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_callback=None,
threshold_db=-30, clip_duration=5, detection_mode="loud"):
os.makedirs(output_folder, exist_ok=True)
# Validate video first
@ -163,14 +510,42 @@ def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_ca
progress_callback(f"❌ Video validation failed", 0)
raise e
if progress_callback:
progress_callback("🔍 Analyzing audio for loud moments...", 10)
# Choose detection method based on mode
if detection_mode == "loud":
if progress_callback:
progress_callback("🔍 Analyzing audio for loud moments...", 10)
best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)
elif detection_mode == "scene":
if progress_callback:
progress_callback("🎬 Analyzing scene changes...", 10)
best_moments = detect_scene_changes(video_path, chunk_duration=clip_duration)
elif detection_mode == "motion":
if progress_callback:
progress_callback("🏃 Analyzing motion intensity...", 10)
best_moments = detect_motion_intensity(video_path, chunk_duration=clip_duration)
elif detection_mode == "speech":
if progress_callback:
progress_callback("😄 Analyzing speech emotions...", 10)
best_moments = detect_speech_emotion(video_path, chunk_duration=clip_duration)
elif detection_mode == "peaks":
if progress_callback:
progress_callback("🎵 Analyzing audio peaks...", 10)
best_moments = detect_audio_peaks(video_path, chunk_duration=clip_duration)
elif detection_mode == "combined":
if progress_callback:
progress_callback("🎯 Running comprehensive analysis...", 10)
best_moments = detect_combined_intensity(video_path, chunk_duration=clip_duration)
else:
best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)
best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)
selected = best_moments[:max_clips]
if not selected:
raise ValueError(f"No loud moments found with threshold {threshold_db} dB. Try lowering the threshold or use a different video.")
mode_name = {
"loud": "loud moments", "scene": "scene changes", "motion": "motion intensity",
"speech": "emotional speech", "peaks": "audio peaks", "combined": "interesting moments"
}.get(detection_mode, "moments")
raise ValueError(f"No {mode_name} found. Try a different detection mode or adjust settings.")
if progress_callback:
progress_callback(f"📊 Found {len(selected)} clips to generate", 20)
@ -194,8 +569,9 @@ def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_ca
class ShortsGeneratorGUI:
def __init__(self, root):
self.root = root
self.root.title("AI Shorts Generator")
self.root.geometry("500x400")
self.root.title("🎬 AI Shorts Generator - Advanced Video Moment Detection")
self.root.geometry("650x650") # Reduced height to eliminate empty space
self.root.minsize(600, 600) # Reduced minimum size
self.video_path = None
self.output_folder = "shorts"
@ -265,43 +641,202 @@ class ShortsGeneratorGUI:
self.use_max_clips.trace("w", lambda *args: toggle_clips_limit())
clips_checkbox.config(command=toggle_clips_limit)
# Audio threshold
# Add tooltip for max clips setting
clips_tooltip_text = """Max Clips Control:
Checked: Limit the number of clips generated
Unchecked: Generate all detected moments
1-3 clips: Quick highlights for social media
4-6 clips: Good variety pack
7-10 clips: Comprehensive highlight reel
Tip: Start with 3 clips, then increase if you want more content"""
ToolTip(self.clips_spinbox, clips_tooltip_text, side='right')
ToolTip(clips_checkbox, clips_tooltip_text, side='right')
# Detection Mode Selection
detection_frame = tk.Frame(settings_frame)
detection_frame.pack(fill="x", pady=5)
tk.Label(detection_frame, text="Detection Mode:", font=("Arial", 9, "bold")).pack(side="left")
self.detection_mode_var = tk.StringVar(value="loud")
self.detection_display_var = tk.StringVar(value="🔊 Loud Moments")
detection_dropdown = ttk.Combobox(detection_frame, textvariable=self.detection_display_var,
values=["🔊 Loud Moments", "🎬 Scene Changes", "🏃 Motion Intensity",
"😄 Emotional Speech", "🎵 Audio Peaks", "🎯 Smart Combined"],
state="readonly", width=22)
detection_dropdown.pack(side="right")
# Store the mapping between display text and internal values
self.mode_mapping = {
"🔊 Loud Moments": "loud",
"🎬 Scene Changes": "scene",
"🏃 Motion Intensity": "motion",
"😄 Emotional Speech": "speech",
"🎵 Audio Peaks": "peaks",
"🎯 Smart Combined": "combined"
}
# Simple, clear descriptions for mode tooltips
mode_descriptions = {
"🔊 Loud Moments": """Analyzes audio volume levels to find the loudest parts of your video.
Best for: Gaming reactions, music highlights, shouting moments
Finds: High-volume audio segments above the threshold
Ideal when: Your video has clear volume differences
Tip: Adjust threshold if too many/few moments found""",
"🎬 Scene Changes": """Detects dramatic visual transitions and cuts in your video.
Best for: Movie trailers, montages, location changes
Finds: Major visual shifts between frames
Ideal when: Video has multiple scenes or camera angles
Tip: Great for content with quick cuts or transitions""",
"🏃 Motion Intensity": """Analyzes movement and action within video frames.
Best for: Sports highlights, dance videos, action scenes
Finds: High-movement moments with lots of visual activity
Ideal when: Video contains physical action or movement
Tip: Perfect for extracting the most dynamic moments""",
"😄 Emotional Speech": """Uses AI to detect excited, emotional, or emphatic speech patterns.
Best for: Reactions, reviews, commentary, tutorials
Finds: Words like 'wow', 'amazing', exclamations, excited tone
Ideal when: Video has spoken content with emotional moments
Tip: Captures the most engaging verbal reactions""",
"🎵 Audio Peaks": """Detects sudden audio spikes like bass drops, impacts, or sound effects.
Best for: Music videos, sound effect moments, beat drops
Finds: Sharp increases in audio frequency or volume
Ideal when: Video has musical elements or sound effects
Tip: Great for rhythm-based or audio-driven content""",
"🎯 Smart Combined": """Intelligently combines all detection methods for optimal results.
Best for: Any video type, general content, unsure what to use
Finds: Moments scoring high across multiple analysis methods
Ideal when: You want the most 'interesting' overall moments
Tip: Recommended starting point for most videos"""
}
# Create tooltip for the dropdown (updates when selection changes)
current_tooltip_text = mode_descriptions["🔊 Loud Moments"] # Default
dropdown_tooltip = ToolTip(detection_dropdown, current_tooltip_text)
# Update tooltip when selection changes
def on_detection_change(event):
selection = detection_dropdown.get()
mode_map = {
"🔊 Loud Moments": "loud",
"🎬 Scene Changes": "scene",
"🏃 Motion Intensity": "motion",
"😄 Emotional Speech": "speech",
"🎵 Audio Peaks": "peaks",
"🎯 Smart Combined": "combined"
}
self.detection_mode_var.set(mode_map.get(selection, "loud"))
# Update tooltip text for the selected mode
dropdown_tooltip.text = mode_descriptions.get(selection, "Select a detection mode")
# Show/hide threshold setting based on mode
if selection == "🔊 Loud Moments":
threshold_frame.pack(fill="x", pady=5)
else:
threshold_frame.pack_forget()
detection_dropdown.bind("<<ComboboxSelected>>", on_detection_change)
# Audio threshold (only shown for loud moments)
threshold_frame = tk.Frame(settings_frame)
threshold_frame.pack(fill="x", pady=5)
tk.Label(threshold_frame, text="Audio Threshold (dB):").pack(side="left")
threshold_label = tk.Label(threshold_frame, text="Audio Threshold (dB):")
threshold_label.pack(side="left")
self.threshold_var = tk.IntVar(value=-30)
threshold_spinbox = tk.Spinbox(threshold_frame, from_=-50, to=0, width=5, textvariable=self.threshold_var)
threshold_spinbox.pack(side="right")
# Add tooltip for threshold setting
threshold_tooltip_text = """Audio Threshold Control:
Higher values (closer to 0): Only very loud moments
Lower values (closer to -50): More moments detected
Default -30 dB: Good balance for most videos
Adjust based on your video's audio levels
Example: Gaming videos might need -20 dB, quiet vlogs might need -40 dB"""
ToolTip(threshold_spinbox, threshold_tooltip_text, side='right')
# Clip duration (increased to 120 seconds max)
duration_frame = tk.Frame(settings_frame)
duration_frame.pack(fill="x", pady=5)
tk.Label(duration_frame, text="Clip Duration (seconds):").pack(side="left")
duration_label = tk.Label(duration_frame, text="Clip Duration (seconds):")
duration_label.pack(side="left")
self.duration_var = tk.IntVar(value=5)
duration_spinbox = tk.Spinbox(duration_frame, from_=3, to=120, width=5, textvariable=self.duration_var)
duration_spinbox.pack(side="right")
# Add tooltip for duration setting
duration_tooltip_text = """Clip Duration Setting:
3-10 seconds: Perfect for TikTok/Instagram Reels
10-30 seconds: Good for YouTube Shorts
30-60 seconds: Longer form highlights
60+ seconds: Extended content clips
Shorter clips = more viral potential
Longer clips = more context and story"""
ToolTip(duration_spinbox, duration_tooltip_text, side='right')
# Preview button
self.preview_btn = tk.Button(self.root, text="🔍 Preview Clips",
command=self.preview_clips, bg="#2196F3", fg="white",
font=("Arial", 10, "bold"), pady=5)
self.preview_btn.pack(pady=10)
self.preview_btn.pack(pady=5)
# Add tooltip for preview button
preview_tooltip_text = """Preview Clips Feature:
Analyzes your video using the selected detection mode
Shows all detected moments with timestamps
Lets you select specific clips to generate
No video files created - just analysis
Great for testing settings before full generation
Tip: Always preview first to see what the AI finds!"""
ToolTip(self.preview_btn, preview_tooltip_text, side='right')
# Generate button
self.generate_btn = tk.Button(self.root, text="🎬 Generate Shorts",
command=self.start_generation, bg="#4CAF50", fg="white",
font=("Arial", 12, "bold"), pady=10)
self.generate_btn.pack(pady=20)
self.generate_btn.pack(pady=10)
# Add tooltip for generate button
generate_tooltip_text = """Generate Shorts Feature:
Creates actual video files from detected moments
Adds AI-generated subtitles to each clip
Formats videos for vertical social media (1080x1920)
Saves clips to your selected output folder
Takes longer but creates ready-to-post content
Tip: Use Preview first to fine-tune your settings!"""
ToolTip(self.generate_btn, generate_tooltip_text, side='right')
# Progress frame
progress_frame = tk.Frame(self.root)
progress_frame.pack(pady=10, padx=20, fill="x")
progress_frame.pack(pady=5, padx=20, fill="x")
self.progress_label = tk.Label(progress_frame, text="Ready to generate shorts")
self.progress_label.pack()
self.progress_bar = ttk.Progressbar(progress_frame, length=400, mode="determinate")
self.progress_bar.pack(pady=5)
self.progress_bar.pack(pady=3)
def select_video(self):
file_path = filedialog.askopenfilename(
@ -327,18 +862,44 @@ class ShortsGeneratorGUI:
# Validate video first
validate_video(self.video_path, min_duration=self.duration_var.get() * 2)
# Analyze for loud moments
# Analyze using selected detection mode
self.preview_btn.config(state="disabled", text="Analyzing...")
self.root.update()
loud_moments = detect_loud_moments(
self.video_path,
chunk_duration=self.duration_var.get(),
threshold_db=self.threshold_var.get()
)
detection_mode = self.detection_mode_var.get()
if not loud_moments:
messagebox.showinfo("Preview", f"No loud moments found with threshold {self.threshold_var.get()} dB.\nTry lowering the threshold.")
if detection_mode == "loud":
moments = detect_loud_moments(
self.video_path,
chunk_duration=self.duration_var.get(),
threshold_db=self.threshold_var.get()
)
mode_name = "loud moments"
elif detection_mode == "scene":
moments = detect_scene_changes(self.video_path, chunk_duration=self.duration_var.get())
mode_name = "scene changes"
elif detection_mode == "motion":
moments = detect_motion_intensity(self.video_path, chunk_duration=self.duration_var.get())
mode_name = "motion moments"
elif detection_mode == "speech":
moments = detect_speech_emotion(self.video_path, chunk_duration=self.duration_var.get())
mode_name = "emotional speech"
elif detection_mode == "peaks":
moments = detect_audio_peaks(self.video_path, chunk_duration=self.duration_var.get())
mode_name = "audio peaks"
elif detection_mode == "combined":
moments = detect_combined_intensity(self.video_path, chunk_duration=self.duration_var.get())
mode_name = "interesting moments"
else:
moments = detect_loud_moments(
self.video_path,
chunk_duration=self.duration_var.get(),
threshold_db=self.threshold_var.get()
)
mode_name = "loud moments"
if not moments:
messagebox.showinfo("Preview", f"No {mode_name} found.\nTry a different detection mode or adjust settings.")
return
# Show preview window
@ -346,7 +907,7 @@ class ShortsGeneratorGUI:
preview_window.title("Preview and Select Clips")
preview_window.geometry("500x400")
tk.Label(preview_window, text=f"Found {len(loud_moments)} loud moments:", font=("Arial", 12, "bold")).pack(pady=10)
tk.Label(preview_window, text=f"Found {len(moments)} {mode_name}:", font=("Arial", 12, "bold")).pack(pady=10)
# Create scrollable frame for checkboxes
canvas = tk.Canvas(preview_window)
@ -364,7 +925,7 @@ class ShortsGeneratorGUI:
# Store checkbox variables and clip data
self.clip_vars = []
# Use all clips if max clips is disabled, otherwise limit by setting
clips_to_show = loud_moments if not self.use_max_clips.get() else loud_moments[:self.clips_var.get()]
clips_to_show = moments if not self.use_max_clips.get() else moments[:self.clips_var.get()]
self.preview_clips_data = clips_to_show
# Add selectable clips with checkboxes
@ -510,11 +1071,12 @@ class ShortsGeneratorGUI:
generate_shorts(
self.video_path,
max_clips=self.clips_var.get() if self.use_max_clips.get() else len(detect_loud_moments(self.video_path, chunk_duration=self.duration_var.get(), threshold_db=self.threshold_var.get())),
max_clips=self.clips_var.get() if self.use_max_clips.get() else 10, # Default max for non-loud modes
output_folder=self.output_folder,
progress_callback=self.update_progress,
threshold_db=self.threshold_var.get(),
clip_duration=self.duration_var.get()
clip_duration=self.duration_var.get(),
detection_mode=self.detection_mode_var.get()
)
messagebox.showinfo("Success", f"Successfully generated shorts in '{self.output_folder}' folder!")
except FileNotFoundError as e:
@ -547,18 +1109,14 @@ def run_gui():
if __name__ == "__main__":
import sys
if len(sys.argv) > 1 and sys.argv[1] == "--gui":
# Run GUI mode
run_gui()
elif len(sys.argv) < 2:
print("Usage: python shorts_generator2.py your_video.mp4")
print(" or: python shorts_generator2.py --gui")
run_gui() # Default to GUI if no args
else:
if len(sys.argv) > 1 and sys.argv[1] != "--gui":
# Run command line mode
try:
generate_shorts(sys.argv[1])
print("✅ Shorts generation completed successfully!")
except Exception as e:
print(f"❌ Error: {str(e)}")
else:
# Run GUI mode (default)
run_gui()

View File

@ -1,4 +1,8 @@
1
00:00:00,000 --> 00:00:08,250
Yeah, yeah, level she's 24.
00:00:00,000 --> 00:00:01,280
Yeah! Yeah!
2
00:00:06,000 --> 00:00:07,809
FLAVOR CHEESE 24!