Update requirements and enhance shorts generator with advanced detection modes and tooltips

This commit is contained in:
klop51 2025-08-09 15:45:24 +02:00
parent 491040b148
commit bd55be0448
3 changed files with 601 additions and 36 deletions

View File

@ -1,3 +1,6 @@
moviepy>=1.0.3 moviepy>=1.0.3
faster-whisper>=0.10.0 faster-whisper>=0.10.0
numpy>=1.21.0 numpy>=1.21.0
opencv-python>=4.5.0
scipy>=1.7.0
librosa>=0.9.0

View File

@ -5,6 +5,150 @@ from faster_whisper import WhisperModel
import tkinter as tk import tkinter as tk
from tkinter import filedialog, messagebox, ttk from tkinter import filedialog, messagebox, ttk
import threading import threading
import cv2
from scipy import signal
import librosa
class ToolTip:
"""Create a tooltip for a given widget"""
def __init__(self, widget, text='widget info', side='right'):
self.widget = widget
self.text = text
self.side = side
self.widget.bind("<Enter>", self.enter)
self.widget.bind("<Leave>", self.leave)
self.tipwindow = None
def enter(self, event=None):
self.showtip()
def leave(self, event=None):
self.hidetip()
def showtip(self):
if self.tipwindow or not self.text:
return
# Get widget position
x = self.widget.winfo_rootx()
y = self.widget.winfo_rooty()
w = self.widget.winfo_width()
h = self.widget.winfo_height()
# Position tooltip based on side preference
if self.side == 'right':
x = x + w + 10 # 10px to the right of widget
y = y
else:
x = x + 25
y = y + h + 5
self.tipwindow = tw = tk.Toplevel(self.widget)
tw.wm_overrideredirect(True)
tw.wm_geometry("+%d+%d" % (x, y))
label = tk.Label(tw, text=self.text, justify=tk.LEFT,
background="#ffffe0", relief=tk.SOLID, borderwidth=1,
font=("Arial", "9", "normal"), wraplength=350)
label.pack(ipadx=5, ipady=3)
def hidetip(self):
tw = self.tipwindow
self.tipwindow = None
if tw:
tw.destroy()
class ComboboxTooltip:
"""Special tooltip for combobox that shows on listbox hover"""
def __init__(self, combobox, descriptions):
self.combobox = combobox
self.descriptions = descriptions
self.tipwindow = None
self.bound_listbox = None
# Bind to combobox events
self.combobox.bind("<Button-1>", self.on_click)
self.combobox.bind("<KeyPress>", self.on_keypress)
def on_click(self, event):
# Try to find the listbox when dropdown opens
self.combobox.after(50, self.bind_listbox)
def on_keypress(self, event):
# Handle keyboard navigation
self.combobox.after(50, self.bind_listbox)
def bind_listbox(self):
# Find the listbox widget more reliably
try:
# Look through all toplevel windows for the combobox popdown
for window in self.combobox.winfo_toplevel().winfo_children():
window_class = window.winfo_class()
if window_class == 'Toplevel':
# Found a toplevel, look for listbox inside
for child in window.winfo_children():
if child.winfo_class() == 'Listbox':
if self.bound_listbox != child:
self.bound_listbox = child
child.bind("<Motion>", self.on_listbox_motion)
child.bind("<Leave>", self.on_listbox_leave)
child.bind("<ButtonRelease-1>", self.on_listbox_leave)
return
except Exception as e:
# Fallback method - try to find any listbox
try:
# Alternative approach: look for the popdown frame
for child in self.combobox.tk.call('winfo', 'children', '.'):
if 'popdown' in str(child):
popdown = self.combobox.nametowidget(child)
for subchild in popdown.winfo_children():
if subchild.winfo_class() == 'Listbox':
if self.bound_listbox != subchild:
self.bound_listbox = subchild
subchild.bind("<Motion>", self.on_listbox_motion)
subchild.bind("<Leave>", self.on_listbox_leave)
subchild.bind("<ButtonRelease-1>", self.on_listbox_leave)
return
except:
pass
def on_listbox_motion(self, event):
try:
listbox = event.widget
index = listbox.nearest(event.y)
if 0 <= index < len(self.combobox['values']):
selection = self.combobox['values'][index]
if selection in self.descriptions:
self.show_tooltip(event, self.descriptions[selection])
except Exception:
pass
def on_listbox_leave(self, event):
self.hide_tooltip()
def show_tooltip(self, event, text):
self.hide_tooltip() # Hide any existing tooltip
try:
x = event.widget.winfo_rootx() + event.widget.winfo_width() + 10
y = event.widget.winfo_rooty() + event.y - 20
self.tipwindow = tw = tk.Toplevel(event.widget)
tw.wm_overrideredirect(True)
tw.wm_geometry("+%d+%d" % (x, y))
label = tk.Label(tw, text=text, justify=tk.LEFT,
background="#ffffe0", relief=tk.SOLID, borderwidth=1,
font=("Arial", "9", "normal"), wraplength=350)
label.pack(ipadx=5, ipady=3)
except Exception:
pass
def hide_tooltip(self):
if self.tipwindow:
try:
self.tipwindow.destroy()
except:
pass
self.tipwindow = None
def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10): def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
print("🔍 Analyzing audio...") print("🔍 Analyzing audio...")
@ -28,6 +172,208 @@ def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
clip.close() clip.close()
return loud_chunks return loud_chunks
def detect_scene_changes(video_path, chunk_duration=5, threshold=0.3):
"""Detect dramatic visual scene changes"""
print("🎬 Analyzing scene changes...")
clip = VideoFileClip(video_path)
# Sample frames at regular intervals
sample_rate = 2 # Check every 2 seconds
times = np.arange(0, clip.duration, sample_rate)
scene_changes = []
prev_frame = None
for i, t in enumerate(times[:-1]):
try:
# Get current and next frame
frame1 = clip.get_frame(t)
frame2 = clip.get_frame(times[i + 1])
# Convert to grayscale and resize for faster processing
gray1 = cv2.cvtColor(frame1, cv2.COLOR_RGB2GRAY)
gray2 = cv2.cvtColor(frame2, cv2.COLOR_RGB2GRAY)
gray1 = cv2.resize(gray1, (160, 90)) # Small size for speed
gray2 = cv2.resize(gray2, (160, 90))
# Calculate structural similarity difference
diff = np.mean(np.abs(gray1.astype(float) - gray2.astype(float))) / 255.0
if diff > threshold:
start = max(0, t - chunk_duration/2)
end = min(clip.duration, t + chunk_duration/2)
scene_changes.append((start, end))
except Exception as e:
print(f"⚠️ Frame analysis error at {t:.1f}s: {e}")
continue
print(f"🎬 Found {len(scene_changes)} scene changes")
clip.close()
return scene_changes
def detect_motion_intensity(video_path, chunk_duration=5, threshold=0.15):
"""Detect high motion/action moments"""
print("🏃 Analyzing motion intensity...")
clip = VideoFileClip(video_path)
sample_rate = 1 # Check every second
times = np.arange(0, clip.duration - 1, sample_rate)
motion_moments = []
for t in times:
try:
# Get two consecutive frames
frame1 = clip.get_frame(t)
frame2 = clip.get_frame(t + 0.5) # Half second later
# Convert to grayscale and resize
gray1 = cv2.cvtColor(frame1, cv2.COLOR_RGB2GRAY)
gray2 = cv2.cvtColor(frame2, cv2.COLOR_RGB2GRAY)
gray1 = cv2.resize(gray1, (160, 90))
gray2 = cv2.resize(gray2, (160, 90))
# Calculate optical flow magnitude
flow = cv2.calcOpticalFlowPyrLK(gray1, gray2,
np.random.randint(0, 160, (100, 1, 2)).astype(np.float32),
None)[0]
if flow is not None:
motion_magnitude = np.mean(np.linalg.norm(flow.reshape(-1, 2), axis=1))
if motion_magnitude > threshold:
start = max(0, t - chunk_duration/2)
end = min(clip.duration, t + chunk_duration/2)
motion_moments.append((start, end))
except Exception as e:
print(f"⚠️ Motion analysis error at {t:.1f}s: {e}")
continue
print(f"🏃 Found {len(motion_moments)} high-motion moments")
clip.close()
return motion_moments
def detect_speech_emotion(video_path, chunk_duration=5):
"""Detect emotional/excited speech patterns"""
print("😄 Analyzing speech emotions...")
# Use Whisper to get detailed speech analysis
model = WhisperModel("base", device="cpu", compute_type="int8")
segments, _ = model.transcribe(video_path, beam_size=5, vad_filter=True, word_timestamps=True)
emotional_moments = []
excitement_keywords = ['wow', 'amazing', 'incredible', 'unbelievable', 'awesome', 'fantastic',
'omg', 'what', 'no way', 'crazy', 'insane', 'perfect', 'yes', 'exactly']
for segment in segments:
text = segment.text.lower()
# Check for excitement keywords
has_keywords = any(keyword in text for keyword in excitement_keywords)
# Check for multiple exclamation-worthy patterns
has_caps = any(word.isupper() for word in segment.text.split())
has_punctuation = '!' in segment.text or '?' in segment.text
is_short_excited = len(text.split()) <= 5 and (has_keywords or has_caps)
if has_keywords or has_punctuation or is_short_excited:
start = max(0, segment.start - chunk_duration/2)
end = min(segment.end + chunk_duration/2, segment.end + chunk_duration)
emotional_moments.append((start, end))
print(f"😄 Found {len(emotional_moments)} emotional speech moments")
return emotional_moments
def detect_audio_peaks(video_path, chunk_duration=5):
"""Detect sudden audio peaks (bass drops, beats, impacts)"""
print("🎵 Analyzing audio peaks...")
clip = VideoFileClip(video_path)
audio = clip.audio.to_soundarray(fps=22050) # Lower sample rate for speed
# Convert to mono if stereo
if len(audio.shape) > 1:
audio = np.mean(audio, axis=1)
# Find spectral peaks (bass, treble spikes)
peaks, _ = signal.find_peaks(np.abs(audio), height=np.percentile(np.abs(audio), 95))
peak_moments = []
prev_peak = 0
for peak in peaks:
peak_time = peak / 22050
# Avoid too close peaks
if peak_time - prev_peak > chunk_duration:
start = max(0, peak_time - chunk_duration/2)
end = min(clip.duration, peak_time + chunk_duration/2)
peak_moments.append((start, end))
prev_peak = peak_time
print(f"🎵 Found {len(peak_moments)} audio peak moments")
clip.close()
return peak_moments
def detect_combined_intensity(video_path, chunk_duration=5, weights=None):
"""Combine multiple detection methods for best moments"""
print("🎯 Running comprehensive moment analysis...")
if weights is None:
weights = {'loud': 0.3, 'scene': 0.2, 'motion': 0.2, 'speech': 0.2, 'peaks': 0.1}
# Get all detection results
loud_moments = detect_loud_moments(video_path, chunk_duration, threshold_db=5) # Lower threshold
scene_moments = detect_scene_changes(video_path, chunk_duration)
motion_moments = detect_motion_intensity(video_path, chunk_duration)
speech_moments = detect_speech_emotion(video_path, chunk_duration)
peak_moments = detect_audio_peaks(video_path, chunk_duration)
# Create time-based scoring
clip = VideoFileClip(video_path)
duration = clip.duration
clip.close()
# Score each second of the video
time_scores = {}
for moments, weight in [(loud_moments, weights['loud']),
(scene_moments, weights['scene']),
(motion_moments, weights['motion']),
(speech_moments, weights['speech']),
(peak_moments, weights['peaks'])]:
for start, end in moments:
for t in range(int(start), int(end) + 1):
if t not in time_scores:
time_scores[t] = 0
time_scores[t] += weight
# Find the highest scoring segments
if not time_scores:
return loud_moments # Fallback to loud moments
# Get top scoring time periods
sorted_times = sorted(time_scores.items(), key=lambda x: x[1], reverse=True)
combined_moments = []
used_times = set()
for time_sec, score in sorted_times:
if time_sec not in used_times and score > 0.3: # Minimum threshold
start = max(0, time_sec - chunk_duration/2)
end = min(duration, time_sec + chunk_duration/2)
combined_moments.append((start, end))
# Mark nearby times as used to avoid overlap
for t in range(max(0, time_sec - chunk_duration),
min(int(duration), time_sec + chunk_duration)):
used_times.add(t)
print(f"🎯 Found {len(combined_moments)} high-intensity combined moments")
return combined_moments
def transcribe_and_extract_subtitles(video_path, start, end): def transcribe_and_extract_subtitles(video_path, start, end):
print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...") print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...")
model = WhisperModel("base", device="cpu", compute_type="int8") model = WhisperModel("base", device="cpu", compute_type="int8")
@ -150,7 +496,8 @@ def validate_video(video_path, min_duration=30):
else: else:
raise ValueError(f"Error reading video: {str(e)}") raise ValueError(f"Error reading video: {str(e)}")
def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_callback=None, threshold_db=-30, clip_duration=5): def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_callback=None,
threshold_db=-30, clip_duration=5, detection_mode="loud"):
os.makedirs(output_folder, exist_ok=True) os.makedirs(output_folder, exist_ok=True)
# Validate video first # Validate video first
@ -163,14 +510,42 @@ def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_ca
progress_callback(f"❌ Video validation failed", 0) progress_callback(f"❌ Video validation failed", 0)
raise e raise e
if progress_callback: # Choose detection method based on mode
progress_callback("🔍 Analyzing audio for loud moments...", 10) if detection_mode == "loud":
if progress_callback:
progress_callback("🔍 Analyzing audio for loud moments...", 10)
best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)
elif detection_mode == "scene":
if progress_callback:
progress_callback("🎬 Analyzing scene changes...", 10)
best_moments = detect_scene_changes(video_path, chunk_duration=clip_duration)
elif detection_mode == "motion":
if progress_callback:
progress_callback("🏃 Analyzing motion intensity...", 10)
best_moments = detect_motion_intensity(video_path, chunk_duration=clip_duration)
elif detection_mode == "speech":
if progress_callback:
progress_callback("😄 Analyzing speech emotions...", 10)
best_moments = detect_speech_emotion(video_path, chunk_duration=clip_duration)
elif detection_mode == "peaks":
if progress_callback:
progress_callback("🎵 Analyzing audio peaks...", 10)
best_moments = detect_audio_peaks(video_path, chunk_duration=clip_duration)
elif detection_mode == "combined":
if progress_callback:
progress_callback("🎯 Running comprehensive analysis...", 10)
best_moments = detect_combined_intensity(video_path, chunk_duration=clip_duration)
else:
best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)
best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)
selected = best_moments[:max_clips] selected = best_moments[:max_clips]
if not selected: if not selected:
raise ValueError(f"No loud moments found with threshold {threshold_db} dB. Try lowering the threshold or use a different video.") mode_name = {
"loud": "loud moments", "scene": "scene changes", "motion": "motion intensity",
"speech": "emotional speech", "peaks": "audio peaks", "combined": "interesting moments"
}.get(detection_mode, "moments")
raise ValueError(f"No {mode_name} found. Try a different detection mode or adjust settings.")
if progress_callback: if progress_callback:
progress_callback(f"📊 Found {len(selected)} clips to generate", 20) progress_callback(f"📊 Found {len(selected)} clips to generate", 20)
@ -194,8 +569,9 @@ def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_ca
class ShortsGeneratorGUI: class ShortsGeneratorGUI:
def __init__(self, root): def __init__(self, root):
self.root = root self.root = root
self.root.title("AI Shorts Generator") self.root.title("🎬 AI Shorts Generator - Advanced Video Moment Detection")
self.root.geometry("500x400") self.root.geometry("650x650") # Reduced height to eliminate empty space
self.root.minsize(600, 600) # Reduced minimum size
self.video_path = None self.video_path = None
self.output_folder = "shorts" self.output_folder = "shorts"
@ -265,43 +641,202 @@ class ShortsGeneratorGUI:
self.use_max_clips.trace("w", lambda *args: toggle_clips_limit()) self.use_max_clips.trace("w", lambda *args: toggle_clips_limit())
clips_checkbox.config(command=toggle_clips_limit) clips_checkbox.config(command=toggle_clips_limit)
# Audio threshold # Add tooltip for max clips setting
clips_tooltip_text = """Max Clips Control:
Checked: Limit the number of clips generated
Unchecked: Generate all detected moments
1-3 clips: Quick highlights for social media
4-6 clips: Good variety pack
7-10 clips: Comprehensive highlight reel
Tip: Start with 3 clips, then increase if you want more content"""
ToolTip(self.clips_spinbox, clips_tooltip_text, side='right')
ToolTip(clips_checkbox, clips_tooltip_text, side='right')
# Detection Mode Selection
detection_frame = tk.Frame(settings_frame)
detection_frame.pack(fill="x", pady=5)
tk.Label(detection_frame, text="Detection Mode:", font=("Arial", 9, "bold")).pack(side="left")
self.detection_mode_var = tk.StringVar(value="loud")
self.detection_display_var = tk.StringVar(value="🔊 Loud Moments")
detection_dropdown = ttk.Combobox(detection_frame, textvariable=self.detection_display_var,
values=["🔊 Loud Moments", "🎬 Scene Changes", "🏃 Motion Intensity",
"😄 Emotional Speech", "🎵 Audio Peaks", "🎯 Smart Combined"],
state="readonly", width=22)
detection_dropdown.pack(side="right")
# Store the mapping between display text and internal values
self.mode_mapping = {
"🔊 Loud Moments": "loud",
"🎬 Scene Changes": "scene",
"🏃 Motion Intensity": "motion",
"😄 Emotional Speech": "speech",
"🎵 Audio Peaks": "peaks",
"🎯 Smart Combined": "combined"
}
# Simple, clear descriptions for mode tooltips
mode_descriptions = {
"🔊 Loud Moments": """Analyzes audio volume levels to find the loudest parts of your video.
Best for: Gaming reactions, music highlights, shouting moments
Finds: High-volume audio segments above the threshold
Ideal when: Your video has clear volume differences
Tip: Adjust threshold if too many/few moments found""",
"🎬 Scene Changes": """Detects dramatic visual transitions and cuts in your video.
Best for: Movie trailers, montages, location changes
Finds: Major visual shifts between frames
Ideal when: Video has multiple scenes or camera angles
Tip: Great for content with quick cuts or transitions""",
"🏃 Motion Intensity": """Analyzes movement and action within video frames.
Best for: Sports highlights, dance videos, action scenes
Finds: High-movement moments with lots of visual activity
Ideal when: Video contains physical action or movement
Tip: Perfect for extracting the most dynamic moments""",
"😄 Emotional Speech": """Uses AI to detect excited, emotional, or emphatic speech patterns.
Best for: Reactions, reviews, commentary, tutorials
Finds: Words like 'wow', 'amazing', exclamations, excited tone
Ideal when: Video has spoken content with emotional moments
Tip: Captures the most engaging verbal reactions""",
"🎵 Audio Peaks": """Detects sudden audio spikes like bass drops, impacts, or sound effects.
Best for: Music videos, sound effect moments, beat drops
Finds: Sharp increases in audio frequency or volume
Ideal when: Video has musical elements or sound effects
Tip: Great for rhythm-based or audio-driven content""",
"🎯 Smart Combined": """Intelligently combines all detection methods for optimal results.
Best for: Any video type, general content, unsure what to use
Finds: Moments scoring high across multiple analysis methods
Ideal when: You want the most 'interesting' overall moments
Tip: Recommended starting point for most videos"""
}
# Create tooltip for the dropdown (updates when selection changes)
current_tooltip_text = mode_descriptions["🔊 Loud Moments"] # Default
dropdown_tooltip = ToolTip(detection_dropdown, current_tooltip_text)
# Update tooltip when selection changes
def on_detection_change(event):
selection = detection_dropdown.get()
mode_map = {
"🔊 Loud Moments": "loud",
"🎬 Scene Changes": "scene",
"🏃 Motion Intensity": "motion",
"😄 Emotional Speech": "speech",
"🎵 Audio Peaks": "peaks",
"🎯 Smart Combined": "combined"
}
self.detection_mode_var.set(mode_map.get(selection, "loud"))
# Update tooltip text for the selected mode
dropdown_tooltip.text = mode_descriptions.get(selection, "Select a detection mode")
# Show/hide threshold setting based on mode
if selection == "🔊 Loud Moments":
threshold_frame.pack(fill="x", pady=5)
else:
threshold_frame.pack_forget()
detection_dropdown.bind("<<ComboboxSelected>>", on_detection_change)
# Audio threshold (only shown for loud moments)
threshold_frame = tk.Frame(settings_frame) threshold_frame = tk.Frame(settings_frame)
threshold_frame.pack(fill="x", pady=5) threshold_frame.pack(fill="x", pady=5)
tk.Label(threshold_frame, text="Audio Threshold (dB):").pack(side="left") threshold_label = tk.Label(threshold_frame, text="Audio Threshold (dB):")
threshold_label.pack(side="left")
self.threshold_var = tk.IntVar(value=-30) self.threshold_var = tk.IntVar(value=-30)
threshold_spinbox = tk.Spinbox(threshold_frame, from_=-50, to=0, width=5, textvariable=self.threshold_var) threshold_spinbox = tk.Spinbox(threshold_frame, from_=-50, to=0, width=5, textvariable=self.threshold_var)
threshold_spinbox.pack(side="right") threshold_spinbox.pack(side="right")
# Add tooltip for threshold setting
threshold_tooltip_text = """Audio Threshold Control:
Higher values (closer to 0): Only very loud moments
Lower values (closer to -50): More moments detected
Default -30 dB: Good balance for most videos
Adjust based on your video's audio levels
Example: Gaming videos might need -20 dB, quiet vlogs might need -40 dB"""
ToolTip(threshold_spinbox, threshold_tooltip_text, side='right')
# Clip duration (increased to 120 seconds max) # Clip duration (increased to 120 seconds max)
duration_frame = tk.Frame(settings_frame) duration_frame = tk.Frame(settings_frame)
duration_frame.pack(fill="x", pady=5) duration_frame.pack(fill="x", pady=5)
tk.Label(duration_frame, text="Clip Duration (seconds):").pack(side="left") duration_label = tk.Label(duration_frame, text="Clip Duration (seconds):")
duration_label.pack(side="left")
self.duration_var = tk.IntVar(value=5) self.duration_var = tk.IntVar(value=5)
duration_spinbox = tk.Spinbox(duration_frame, from_=3, to=120, width=5, textvariable=self.duration_var) duration_spinbox = tk.Spinbox(duration_frame, from_=3, to=120, width=5, textvariable=self.duration_var)
duration_spinbox.pack(side="right") duration_spinbox.pack(side="right")
# Add tooltip for duration setting
duration_tooltip_text = """Clip Duration Setting:
3-10 seconds: Perfect for TikTok/Instagram Reels
10-30 seconds: Good for YouTube Shorts
30-60 seconds: Longer form highlights
60+ seconds: Extended content clips
Shorter clips = more viral potential
Longer clips = more context and story"""
ToolTip(duration_spinbox, duration_tooltip_text, side='right')
# Preview button # Preview button
self.preview_btn = tk.Button(self.root, text="🔍 Preview Clips", self.preview_btn = tk.Button(self.root, text="🔍 Preview Clips",
command=self.preview_clips, bg="#2196F3", fg="white", command=self.preview_clips, bg="#2196F3", fg="white",
font=("Arial", 10, "bold"), pady=5) font=("Arial", 10, "bold"), pady=5)
self.preview_btn.pack(pady=10) self.preview_btn.pack(pady=5)
# Add tooltip for preview button
preview_tooltip_text = """Preview Clips Feature:
Analyzes your video using the selected detection mode
Shows all detected moments with timestamps
Lets you select specific clips to generate
No video files created - just analysis
Great for testing settings before full generation
Tip: Always preview first to see what the AI finds!"""
ToolTip(self.preview_btn, preview_tooltip_text, side='right')
# Generate button # Generate button
self.generate_btn = tk.Button(self.root, text="🎬 Generate Shorts", self.generate_btn = tk.Button(self.root, text="🎬 Generate Shorts",
command=self.start_generation, bg="#4CAF50", fg="white", command=self.start_generation, bg="#4CAF50", fg="white",
font=("Arial", 12, "bold"), pady=10) font=("Arial", 12, "bold"), pady=10)
self.generate_btn.pack(pady=20) self.generate_btn.pack(pady=10)
# Add tooltip for generate button
generate_tooltip_text = """Generate Shorts Feature:
Creates actual video files from detected moments
Adds AI-generated subtitles to each clip
Formats videos for vertical social media (1080x1920)
Saves clips to your selected output folder
Takes longer but creates ready-to-post content
Tip: Use Preview first to fine-tune your settings!"""
ToolTip(self.generate_btn, generate_tooltip_text, side='right')
# Progress frame # Progress frame
progress_frame = tk.Frame(self.root) progress_frame = tk.Frame(self.root)
progress_frame.pack(pady=10, padx=20, fill="x") progress_frame.pack(pady=5, padx=20, fill="x")
self.progress_label = tk.Label(progress_frame, text="Ready to generate shorts") self.progress_label = tk.Label(progress_frame, text="Ready to generate shorts")
self.progress_label.pack() self.progress_label.pack()
self.progress_bar = ttk.Progressbar(progress_frame, length=400, mode="determinate") self.progress_bar = ttk.Progressbar(progress_frame, length=400, mode="determinate")
self.progress_bar.pack(pady=5) self.progress_bar.pack(pady=3)
def select_video(self): def select_video(self):
file_path = filedialog.askopenfilename( file_path = filedialog.askopenfilename(
@ -327,18 +862,44 @@ class ShortsGeneratorGUI:
# Validate video first # Validate video first
validate_video(self.video_path, min_duration=self.duration_var.get() * 2) validate_video(self.video_path, min_duration=self.duration_var.get() * 2)
# Analyze for loud moments # Analyze using selected detection mode
self.preview_btn.config(state="disabled", text="Analyzing...") self.preview_btn.config(state="disabled", text="Analyzing...")
self.root.update() self.root.update()
loud_moments = detect_loud_moments( detection_mode = self.detection_mode_var.get()
self.video_path,
chunk_duration=self.duration_var.get(),
threshold_db=self.threshold_var.get()
)
if not loud_moments: if detection_mode == "loud":
messagebox.showinfo("Preview", f"No loud moments found with threshold {self.threshold_var.get()} dB.\nTry lowering the threshold.") moments = detect_loud_moments(
self.video_path,
chunk_duration=self.duration_var.get(),
threshold_db=self.threshold_var.get()
)
mode_name = "loud moments"
elif detection_mode == "scene":
moments = detect_scene_changes(self.video_path, chunk_duration=self.duration_var.get())
mode_name = "scene changes"
elif detection_mode == "motion":
moments = detect_motion_intensity(self.video_path, chunk_duration=self.duration_var.get())
mode_name = "motion moments"
elif detection_mode == "speech":
moments = detect_speech_emotion(self.video_path, chunk_duration=self.duration_var.get())
mode_name = "emotional speech"
elif detection_mode == "peaks":
moments = detect_audio_peaks(self.video_path, chunk_duration=self.duration_var.get())
mode_name = "audio peaks"
elif detection_mode == "combined":
moments = detect_combined_intensity(self.video_path, chunk_duration=self.duration_var.get())
mode_name = "interesting moments"
else:
moments = detect_loud_moments(
self.video_path,
chunk_duration=self.duration_var.get(),
threshold_db=self.threshold_var.get()
)
mode_name = "loud moments"
if not moments:
messagebox.showinfo("Preview", f"No {mode_name} found.\nTry a different detection mode or adjust settings.")
return return
# Show preview window # Show preview window
@ -346,7 +907,7 @@ class ShortsGeneratorGUI:
preview_window.title("Preview and Select Clips") preview_window.title("Preview and Select Clips")
preview_window.geometry("500x400") preview_window.geometry("500x400")
tk.Label(preview_window, text=f"Found {len(loud_moments)} loud moments:", font=("Arial", 12, "bold")).pack(pady=10) tk.Label(preview_window, text=f"Found {len(moments)} {mode_name}:", font=("Arial", 12, "bold")).pack(pady=10)
# Create scrollable frame for checkboxes # Create scrollable frame for checkboxes
canvas = tk.Canvas(preview_window) canvas = tk.Canvas(preview_window)
@ -364,7 +925,7 @@ class ShortsGeneratorGUI:
# Store checkbox variables and clip data # Store checkbox variables and clip data
self.clip_vars = [] self.clip_vars = []
# Use all clips if max clips is disabled, otherwise limit by setting # Use all clips if max clips is disabled, otherwise limit by setting
clips_to_show = loud_moments if not self.use_max_clips.get() else loud_moments[:self.clips_var.get()] clips_to_show = moments if not self.use_max_clips.get() else moments[:self.clips_var.get()]
self.preview_clips_data = clips_to_show self.preview_clips_data = clips_to_show
# Add selectable clips with checkboxes # Add selectable clips with checkboxes
@ -510,11 +1071,12 @@ class ShortsGeneratorGUI:
generate_shorts( generate_shorts(
self.video_path, self.video_path,
max_clips=self.clips_var.get() if self.use_max_clips.get() else len(detect_loud_moments(self.video_path, chunk_duration=self.duration_var.get(), threshold_db=self.threshold_var.get())), max_clips=self.clips_var.get() if self.use_max_clips.get() else 10, # Default max for non-loud modes
output_folder=self.output_folder, output_folder=self.output_folder,
progress_callback=self.update_progress, progress_callback=self.update_progress,
threshold_db=self.threshold_var.get(), threshold_db=self.threshold_var.get(),
clip_duration=self.duration_var.get() clip_duration=self.duration_var.get(),
detection_mode=self.detection_mode_var.get()
) )
messagebox.showinfo("Success", f"Successfully generated shorts in '{self.output_folder}' folder!") messagebox.showinfo("Success", f"Successfully generated shorts in '{self.output_folder}' folder!")
except FileNotFoundError as e: except FileNotFoundError as e:
@ -547,18 +1109,14 @@ def run_gui():
if __name__ == "__main__": if __name__ == "__main__":
import sys import sys
if len(sys.argv) > 1 and sys.argv[1] == "--gui": if len(sys.argv) > 1 and sys.argv[1] != "--gui":
# Run GUI mode
run_gui()
elif len(sys.argv) < 2:
print("Usage: python shorts_generator2.py your_video.mp4")
print(" or: python shorts_generator2.py --gui")
run_gui() # Default to GUI if no args
else:
# Run command line mode # Run command line mode
try: try:
generate_shorts(sys.argv[1]) generate_shorts(sys.argv[1])
print("✅ Shorts generation completed successfully!") print("✅ Shorts generation completed successfully!")
except Exception as e: except Exception as e:
print(f"❌ Error: {str(e)}") print(f"❌ Error: {str(e)}")
else:
# Run GUI mode (default)
run_gui()

View File

@ -1,4 +1,8 @@
1 1
00:00:00,000 --> 00:00:08,250 00:00:00,000 --> 00:00:01,280
Yeah, yeah, level she's 24. Yeah! Yeah!
2
00:00:06,000 --> 00:00:07,809
FLAVOR CHEESE 24!