Update requirements and enhance shorts generator with advanced detection modes and tooltips
This commit is contained in:
parent
491040b148
commit
bd55be0448
@ -1,3 +1,6 @@
|
||||
moviepy>=1.0.3
|
||||
faster-whisper>=0.10.0
|
||||
numpy>=1.21.0
|
||||
opencv-python>=4.5.0
|
||||
scipy>=1.7.0
|
||||
librosa>=0.9.0
|
||||
|
||||
@ -5,6 +5,150 @@ from faster_whisper import WhisperModel
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog, messagebox, ttk
|
||||
import threading
|
||||
import cv2
|
||||
from scipy import signal
|
||||
import librosa
|
||||
|
||||
class ToolTip:
|
||||
"""Create a tooltip for a given widget"""
|
||||
def __init__(self, widget, text='widget info', side='right'):
|
||||
self.widget = widget
|
||||
self.text = text
|
||||
self.side = side
|
||||
self.widget.bind("<Enter>", self.enter)
|
||||
self.widget.bind("<Leave>", self.leave)
|
||||
self.tipwindow = None
|
||||
|
||||
def enter(self, event=None):
|
||||
self.showtip()
|
||||
|
||||
def leave(self, event=None):
|
||||
self.hidetip()
|
||||
|
||||
def showtip(self):
|
||||
if self.tipwindow or not self.text:
|
||||
return
|
||||
|
||||
# Get widget position
|
||||
x = self.widget.winfo_rootx()
|
||||
y = self.widget.winfo_rooty()
|
||||
w = self.widget.winfo_width()
|
||||
h = self.widget.winfo_height()
|
||||
|
||||
# Position tooltip based on side preference
|
||||
if self.side == 'right':
|
||||
x = x + w + 10 # 10px to the right of widget
|
||||
y = y
|
||||
else:
|
||||
x = x + 25
|
||||
y = y + h + 5
|
||||
|
||||
self.tipwindow = tw = tk.Toplevel(self.widget)
|
||||
tw.wm_overrideredirect(True)
|
||||
tw.wm_geometry("+%d+%d" % (x, y))
|
||||
label = tk.Label(tw, text=self.text, justify=tk.LEFT,
|
||||
background="#ffffe0", relief=tk.SOLID, borderwidth=1,
|
||||
font=("Arial", "9", "normal"), wraplength=350)
|
||||
label.pack(ipadx=5, ipady=3)
|
||||
|
||||
def hidetip(self):
|
||||
tw = self.tipwindow
|
||||
self.tipwindow = None
|
||||
if tw:
|
||||
tw.destroy()
|
||||
|
||||
class ComboboxTooltip:
|
||||
"""Special tooltip for combobox that shows on listbox hover"""
|
||||
def __init__(self, combobox, descriptions):
|
||||
self.combobox = combobox
|
||||
self.descriptions = descriptions
|
||||
self.tipwindow = None
|
||||
self.bound_listbox = None
|
||||
|
||||
# Bind to combobox events
|
||||
self.combobox.bind("<Button-1>", self.on_click)
|
||||
self.combobox.bind("<KeyPress>", self.on_keypress)
|
||||
|
||||
def on_click(self, event):
|
||||
# Try to find the listbox when dropdown opens
|
||||
self.combobox.after(50, self.bind_listbox)
|
||||
|
||||
def on_keypress(self, event):
|
||||
# Handle keyboard navigation
|
||||
self.combobox.after(50, self.bind_listbox)
|
||||
|
||||
def bind_listbox(self):
|
||||
# Find the listbox widget more reliably
|
||||
try:
|
||||
# Look through all toplevel windows for the combobox popdown
|
||||
for window in self.combobox.winfo_toplevel().winfo_children():
|
||||
window_class = window.winfo_class()
|
||||
if window_class == 'Toplevel':
|
||||
# Found a toplevel, look for listbox inside
|
||||
for child in window.winfo_children():
|
||||
if child.winfo_class() == 'Listbox':
|
||||
if self.bound_listbox != child:
|
||||
self.bound_listbox = child
|
||||
child.bind("<Motion>", self.on_listbox_motion)
|
||||
child.bind("<Leave>", self.on_listbox_leave)
|
||||
child.bind("<ButtonRelease-1>", self.on_listbox_leave)
|
||||
return
|
||||
except Exception as e:
|
||||
# Fallback method - try to find any listbox
|
||||
try:
|
||||
# Alternative approach: look for the popdown frame
|
||||
for child in self.combobox.tk.call('winfo', 'children', '.'):
|
||||
if 'popdown' in str(child):
|
||||
popdown = self.combobox.nametowidget(child)
|
||||
for subchild in popdown.winfo_children():
|
||||
if subchild.winfo_class() == 'Listbox':
|
||||
if self.bound_listbox != subchild:
|
||||
self.bound_listbox = subchild
|
||||
subchild.bind("<Motion>", self.on_listbox_motion)
|
||||
subchild.bind("<Leave>", self.on_listbox_leave)
|
||||
subchild.bind("<ButtonRelease-1>", self.on_listbox_leave)
|
||||
return
|
||||
except:
|
||||
pass
|
||||
|
||||
def on_listbox_motion(self, event):
|
||||
try:
|
||||
listbox = event.widget
|
||||
index = listbox.nearest(event.y)
|
||||
if 0 <= index < len(self.combobox['values']):
|
||||
selection = self.combobox['values'][index]
|
||||
if selection in self.descriptions:
|
||||
self.show_tooltip(event, self.descriptions[selection])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def on_listbox_leave(self, event):
|
||||
self.hide_tooltip()
|
||||
|
||||
def show_tooltip(self, event, text):
|
||||
self.hide_tooltip() # Hide any existing tooltip
|
||||
|
||||
try:
|
||||
x = event.widget.winfo_rootx() + event.widget.winfo_width() + 10
|
||||
y = event.widget.winfo_rooty() + event.y - 20
|
||||
|
||||
self.tipwindow = tw = tk.Toplevel(event.widget)
|
||||
tw.wm_overrideredirect(True)
|
||||
tw.wm_geometry("+%d+%d" % (x, y))
|
||||
label = tk.Label(tw, text=text, justify=tk.LEFT,
|
||||
background="#ffffe0", relief=tk.SOLID, borderwidth=1,
|
||||
font=("Arial", "9", "normal"), wraplength=350)
|
||||
label.pack(ipadx=5, ipady=3)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def hide_tooltip(self):
|
||||
if self.tipwindow:
|
||||
try:
|
||||
self.tipwindow.destroy()
|
||||
except:
|
||||
pass
|
||||
self.tipwindow = None
|
||||
|
||||
def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
|
||||
print("🔍 Analyzing audio...")
|
||||
@ -28,6 +172,208 @@ def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
|
||||
clip.close()
|
||||
return loud_chunks
|
||||
|
||||
def detect_scene_changes(video_path, chunk_duration=5, threshold=0.3):
|
||||
"""Detect dramatic visual scene changes"""
|
||||
print("🎬 Analyzing scene changes...")
|
||||
clip = VideoFileClip(video_path)
|
||||
|
||||
# Sample frames at regular intervals
|
||||
sample_rate = 2 # Check every 2 seconds
|
||||
times = np.arange(0, clip.duration, sample_rate)
|
||||
|
||||
scene_changes = []
|
||||
prev_frame = None
|
||||
|
||||
for i, t in enumerate(times[:-1]):
|
||||
try:
|
||||
# Get current and next frame
|
||||
frame1 = clip.get_frame(t)
|
||||
frame2 = clip.get_frame(times[i + 1])
|
||||
|
||||
# Convert to grayscale and resize for faster processing
|
||||
gray1 = cv2.cvtColor(frame1, cv2.COLOR_RGB2GRAY)
|
||||
gray2 = cv2.cvtColor(frame2, cv2.COLOR_RGB2GRAY)
|
||||
gray1 = cv2.resize(gray1, (160, 90)) # Small size for speed
|
||||
gray2 = cv2.resize(gray2, (160, 90))
|
||||
|
||||
# Calculate structural similarity difference
|
||||
diff = np.mean(np.abs(gray1.astype(float) - gray2.astype(float))) / 255.0
|
||||
|
||||
if diff > threshold:
|
||||
start = max(0, t - chunk_duration/2)
|
||||
end = min(clip.duration, t + chunk_duration/2)
|
||||
scene_changes.append((start, end))
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ Frame analysis error at {t:.1f}s: {e}")
|
||||
continue
|
||||
|
||||
print(f"🎬 Found {len(scene_changes)} scene changes")
|
||||
clip.close()
|
||||
return scene_changes
|
||||
|
||||
def detect_motion_intensity(video_path, chunk_duration=5, threshold=0.15):
|
||||
"""Detect high motion/action moments"""
|
||||
print("🏃 Analyzing motion intensity...")
|
||||
clip = VideoFileClip(video_path)
|
||||
|
||||
sample_rate = 1 # Check every second
|
||||
times = np.arange(0, clip.duration - 1, sample_rate)
|
||||
|
||||
motion_moments = []
|
||||
|
||||
for t in times:
|
||||
try:
|
||||
# Get two consecutive frames
|
||||
frame1 = clip.get_frame(t)
|
||||
frame2 = clip.get_frame(t + 0.5) # Half second later
|
||||
|
||||
# Convert to grayscale and resize
|
||||
gray1 = cv2.cvtColor(frame1, cv2.COLOR_RGB2GRAY)
|
||||
gray2 = cv2.cvtColor(frame2, cv2.COLOR_RGB2GRAY)
|
||||
gray1 = cv2.resize(gray1, (160, 90))
|
||||
gray2 = cv2.resize(gray2, (160, 90))
|
||||
|
||||
# Calculate optical flow magnitude
|
||||
flow = cv2.calcOpticalFlowPyrLK(gray1, gray2,
|
||||
np.random.randint(0, 160, (100, 1, 2)).astype(np.float32),
|
||||
None)[0]
|
||||
|
||||
if flow is not None:
|
||||
motion_magnitude = np.mean(np.linalg.norm(flow.reshape(-1, 2), axis=1))
|
||||
|
||||
if motion_magnitude > threshold:
|
||||
start = max(0, t - chunk_duration/2)
|
||||
end = min(clip.duration, t + chunk_duration/2)
|
||||
motion_moments.append((start, end))
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ Motion analysis error at {t:.1f}s: {e}")
|
||||
continue
|
||||
|
||||
print(f"🏃 Found {len(motion_moments)} high-motion moments")
|
||||
clip.close()
|
||||
return motion_moments
|
||||
|
||||
def detect_speech_emotion(video_path, chunk_duration=5):
|
||||
"""Detect emotional/excited speech patterns"""
|
||||
print("😄 Analyzing speech emotions...")
|
||||
|
||||
# Use Whisper to get detailed speech analysis
|
||||
model = WhisperModel("base", device="cpu", compute_type="int8")
|
||||
segments, _ = model.transcribe(video_path, beam_size=5, vad_filter=True, word_timestamps=True)
|
||||
|
||||
emotional_moments = []
|
||||
excitement_keywords = ['wow', 'amazing', 'incredible', 'unbelievable', 'awesome', 'fantastic',
|
||||
'omg', 'what', 'no way', 'crazy', 'insane', 'perfect', 'yes', 'exactly']
|
||||
|
||||
for segment in segments:
|
||||
text = segment.text.lower()
|
||||
|
||||
# Check for excitement keywords
|
||||
has_keywords = any(keyword in text for keyword in excitement_keywords)
|
||||
|
||||
# Check for multiple exclamation-worthy patterns
|
||||
has_caps = any(word.isupper() for word in segment.text.split())
|
||||
has_punctuation = '!' in segment.text or '?' in segment.text
|
||||
is_short_excited = len(text.split()) <= 5 and (has_keywords or has_caps)
|
||||
|
||||
if has_keywords or has_punctuation or is_short_excited:
|
||||
start = max(0, segment.start - chunk_duration/2)
|
||||
end = min(segment.end + chunk_duration/2, segment.end + chunk_duration)
|
||||
emotional_moments.append((start, end))
|
||||
|
||||
print(f"😄 Found {len(emotional_moments)} emotional speech moments")
|
||||
return emotional_moments
|
||||
|
||||
def detect_audio_peaks(video_path, chunk_duration=5):
|
||||
"""Detect sudden audio peaks (bass drops, beats, impacts)"""
|
||||
print("🎵 Analyzing audio peaks...")
|
||||
|
||||
clip = VideoFileClip(video_path)
|
||||
audio = clip.audio.to_soundarray(fps=22050) # Lower sample rate for speed
|
||||
|
||||
# Convert to mono if stereo
|
||||
if len(audio.shape) > 1:
|
||||
audio = np.mean(audio, axis=1)
|
||||
|
||||
# Find spectral peaks (bass, treble spikes)
|
||||
peaks, _ = signal.find_peaks(np.abs(audio), height=np.percentile(np.abs(audio), 95))
|
||||
|
||||
peak_moments = []
|
||||
prev_peak = 0
|
||||
|
||||
for peak in peaks:
|
||||
peak_time = peak / 22050
|
||||
|
||||
# Avoid too close peaks
|
||||
if peak_time - prev_peak > chunk_duration:
|
||||
start = max(0, peak_time - chunk_duration/2)
|
||||
end = min(clip.duration, peak_time + chunk_duration/2)
|
||||
peak_moments.append((start, end))
|
||||
prev_peak = peak_time
|
||||
|
||||
print(f"🎵 Found {len(peak_moments)} audio peak moments")
|
||||
clip.close()
|
||||
return peak_moments
|
||||
|
||||
def detect_combined_intensity(video_path, chunk_duration=5, weights=None):
|
||||
"""Combine multiple detection methods for best moments"""
|
||||
print("🎯 Running comprehensive moment analysis...")
|
||||
|
||||
if weights is None:
|
||||
weights = {'loud': 0.3, 'scene': 0.2, 'motion': 0.2, 'speech': 0.2, 'peaks': 0.1}
|
||||
|
||||
# Get all detection results
|
||||
loud_moments = detect_loud_moments(video_path, chunk_duration, threshold_db=5) # Lower threshold
|
||||
scene_moments = detect_scene_changes(video_path, chunk_duration)
|
||||
motion_moments = detect_motion_intensity(video_path, chunk_duration)
|
||||
speech_moments = detect_speech_emotion(video_path, chunk_duration)
|
||||
peak_moments = detect_audio_peaks(video_path, chunk_duration)
|
||||
|
||||
# Create time-based scoring
|
||||
clip = VideoFileClip(video_path)
|
||||
duration = clip.duration
|
||||
clip.close()
|
||||
|
||||
# Score each second of the video
|
||||
time_scores = {}
|
||||
|
||||
for moments, weight in [(loud_moments, weights['loud']),
|
||||
(scene_moments, weights['scene']),
|
||||
(motion_moments, weights['motion']),
|
||||
(speech_moments, weights['speech']),
|
||||
(peak_moments, weights['peaks'])]:
|
||||
for start, end in moments:
|
||||
for t in range(int(start), int(end) + 1):
|
||||
if t not in time_scores:
|
||||
time_scores[t] = 0
|
||||
time_scores[t] += weight
|
||||
|
||||
# Find the highest scoring segments
|
||||
if not time_scores:
|
||||
return loud_moments # Fallback to loud moments
|
||||
|
||||
# Get top scoring time periods
|
||||
sorted_times = sorted(time_scores.items(), key=lambda x: x[1], reverse=True)
|
||||
|
||||
combined_moments = []
|
||||
used_times = set()
|
||||
|
||||
for time_sec, score in sorted_times:
|
||||
if time_sec not in used_times and score > 0.3: # Minimum threshold
|
||||
start = max(0, time_sec - chunk_duration/2)
|
||||
end = min(duration, time_sec + chunk_duration/2)
|
||||
combined_moments.append((start, end))
|
||||
|
||||
# Mark nearby times as used to avoid overlap
|
||||
for t in range(max(0, time_sec - chunk_duration),
|
||||
min(int(duration), time_sec + chunk_duration)):
|
||||
used_times.add(t)
|
||||
|
||||
print(f"🎯 Found {len(combined_moments)} high-intensity combined moments")
|
||||
return combined_moments
|
||||
|
||||
def transcribe_and_extract_subtitles(video_path, start, end):
|
||||
print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...")
|
||||
model = WhisperModel("base", device="cpu", compute_type="int8")
|
||||
@ -150,7 +496,8 @@ def validate_video(video_path, min_duration=30):
|
||||
else:
|
||||
raise ValueError(f"Error reading video: {str(e)}")
|
||||
|
||||
def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_callback=None, threshold_db=-30, clip_duration=5):
|
||||
def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_callback=None,
|
||||
threshold_db=-30, clip_duration=5, detection_mode="loud"):
|
||||
os.makedirs(output_folder, exist_ok=True)
|
||||
|
||||
# Validate video first
|
||||
@ -163,14 +510,42 @@ def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_ca
|
||||
progress_callback(f"❌ Video validation failed", 0)
|
||||
raise e
|
||||
|
||||
if progress_callback:
|
||||
progress_callback("🔍 Analyzing audio for loud moments...", 10)
|
||||
# Choose detection method based on mode
|
||||
if detection_mode == "loud":
|
||||
if progress_callback:
|
||||
progress_callback("🔍 Analyzing audio for loud moments...", 10)
|
||||
best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)
|
||||
elif detection_mode == "scene":
|
||||
if progress_callback:
|
||||
progress_callback("🎬 Analyzing scene changes...", 10)
|
||||
best_moments = detect_scene_changes(video_path, chunk_duration=clip_duration)
|
||||
elif detection_mode == "motion":
|
||||
if progress_callback:
|
||||
progress_callback("🏃 Analyzing motion intensity...", 10)
|
||||
best_moments = detect_motion_intensity(video_path, chunk_duration=clip_duration)
|
||||
elif detection_mode == "speech":
|
||||
if progress_callback:
|
||||
progress_callback("😄 Analyzing speech emotions...", 10)
|
||||
best_moments = detect_speech_emotion(video_path, chunk_duration=clip_duration)
|
||||
elif detection_mode == "peaks":
|
||||
if progress_callback:
|
||||
progress_callback("🎵 Analyzing audio peaks...", 10)
|
||||
best_moments = detect_audio_peaks(video_path, chunk_duration=clip_duration)
|
||||
elif detection_mode == "combined":
|
||||
if progress_callback:
|
||||
progress_callback("🎯 Running comprehensive analysis...", 10)
|
||||
best_moments = detect_combined_intensity(video_path, chunk_duration=clip_duration)
|
||||
else:
|
||||
best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)
|
||||
|
||||
best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)
|
||||
selected = best_moments[:max_clips]
|
||||
|
||||
if not selected:
|
||||
raise ValueError(f"No loud moments found with threshold {threshold_db} dB. Try lowering the threshold or use a different video.")
|
||||
mode_name = {
|
||||
"loud": "loud moments", "scene": "scene changes", "motion": "motion intensity",
|
||||
"speech": "emotional speech", "peaks": "audio peaks", "combined": "interesting moments"
|
||||
}.get(detection_mode, "moments")
|
||||
raise ValueError(f"No {mode_name} found. Try a different detection mode or adjust settings.")
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(f"📊 Found {len(selected)} clips to generate", 20)
|
||||
@ -194,8 +569,9 @@ def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_ca
|
||||
class ShortsGeneratorGUI:
|
||||
def __init__(self, root):
|
||||
self.root = root
|
||||
self.root.title("AI Shorts Generator")
|
||||
self.root.geometry("500x400")
|
||||
self.root.title("🎬 AI Shorts Generator - Advanced Video Moment Detection")
|
||||
self.root.geometry("650x650") # Reduced height to eliminate empty space
|
||||
self.root.minsize(600, 600) # Reduced minimum size
|
||||
|
||||
self.video_path = None
|
||||
self.output_folder = "shorts"
|
||||
@ -265,43 +641,202 @@ class ShortsGeneratorGUI:
|
||||
self.use_max_clips.trace("w", lambda *args: toggle_clips_limit())
|
||||
clips_checkbox.config(command=toggle_clips_limit)
|
||||
|
||||
# Audio threshold
|
||||
# Add tooltip for max clips setting
|
||||
clips_tooltip_text = """Max Clips Control:
|
||||
|
||||
• Checked: Limit the number of clips generated
|
||||
• Unchecked: Generate all detected moments
|
||||
• 1-3 clips: Quick highlights for social media
|
||||
• 4-6 clips: Good variety pack
|
||||
• 7-10 clips: Comprehensive highlight reel
|
||||
|
||||
Tip: Start with 3 clips, then increase if you want more content"""
|
||||
ToolTip(self.clips_spinbox, clips_tooltip_text, side='right')
|
||||
ToolTip(clips_checkbox, clips_tooltip_text, side='right')
|
||||
|
||||
# Detection Mode Selection
|
||||
detection_frame = tk.Frame(settings_frame)
|
||||
detection_frame.pack(fill="x", pady=5)
|
||||
tk.Label(detection_frame, text="Detection Mode:", font=("Arial", 9, "bold")).pack(side="left")
|
||||
|
||||
self.detection_mode_var = tk.StringVar(value="loud")
|
||||
self.detection_display_var = tk.StringVar(value="🔊 Loud Moments")
|
||||
|
||||
detection_dropdown = ttk.Combobox(detection_frame, textvariable=self.detection_display_var,
|
||||
values=["🔊 Loud Moments", "🎬 Scene Changes", "🏃 Motion Intensity",
|
||||
"😄 Emotional Speech", "🎵 Audio Peaks", "🎯 Smart Combined"],
|
||||
state="readonly", width=22)
|
||||
detection_dropdown.pack(side="right")
|
||||
|
||||
# Store the mapping between display text and internal values
|
||||
self.mode_mapping = {
|
||||
"🔊 Loud Moments": "loud",
|
||||
"🎬 Scene Changes": "scene",
|
||||
"🏃 Motion Intensity": "motion",
|
||||
"😄 Emotional Speech": "speech",
|
||||
"🎵 Audio Peaks": "peaks",
|
||||
"🎯 Smart Combined": "combined"
|
||||
}
|
||||
|
||||
# Simple, clear descriptions for mode tooltips
|
||||
mode_descriptions = {
|
||||
"🔊 Loud Moments": """Analyzes audio volume levels to find the loudest parts of your video.
|
||||
|
||||
• Best for: Gaming reactions, music highlights, shouting moments
|
||||
• Finds: High-volume audio segments above the threshold
|
||||
• Ideal when: Your video has clear volume differences
|
||||
• Tip: Adjust threshold if too many/few moments found""",
|
||||
|
||||
"🎬 Scene Changes": """Detects dramatic visual transitions and cuts in your video.
|
||||
|
||||
• Best for: Movie trailers, montages, location changes
|
||||
• Finds: Major visual shifts between frames
|
||||
• Ideal when: Video has multiple scenes or camera angles
|
||||
• Tip: Great for content with quick cuts or transitions""",
|
||||
|
||||
"🏃 Motion Intensity": """Analyzes movement and action within video frames.
|
||||
|
||||
• Best for: Sports highlights, dance videos, action scenes
|
||||
• Finds: High-movement moments with lots of visual activity
|
||||
• Ideal when: Video contains physical action or movement
|
||||
• Tip: Perfect for extracting the most dynamic moments""",
|
||||
|
||||
"😄 Emotional Speech": """Uses AI to detect excited, emotional, or emphatic speech patterns.
|
||||
|
||||
• Best for: Reactions, reviews, commentary, tutorials
|
||||
• Finds: Words like 'wow', 'amazing', exclamations, excited tone
|
||||
• Ideal when: Video has spoken content with emotional moments
|
||||
• Tip: Captures the most engaging verbal reactions""",
|
||||
|
||||
"🎵 Audio Peaks": """Detects sudden audio spikes like bass drops, impacts, or sound effects.
|
||||
|
||||
• Best for: Music videos, sound effect moments, beat drops
|
||||
• Finds: Sharp increases in audio frequency or volume
|
||||
• Ideal when: Video has musical elements or sound effects
|
||||
• Tip: Great for rhythm-based or audio-driven content""",
|
||||
|
||||
"🎯 Smart Combined": """Intelligently combines all detection methods for optimal results.
|
||||
|
||||
• Best for: Any video type, general content, unsure what to use
|
||||
• Finds: Moments scoring high across multiple analysis methods
|
||||
• Ideal when: You want the most 'interesting' overall moments
|
||||
• Tip: Recommended starting point for most videos"""
|
||||
}
|
||||
|
||||
# Create tooltip for the dropdown (updates when selection changes)
|
||||
current_tooltip_text = mode_descriptions["🔊 Loud Moments"] # Default
|
||||
dropdown_tooltip = ToolTip(detection_dropdown, current_tooltip_text)
|
||||
|
||||
# Update tooltip when selection changes
|
||||
def on_detection_change(event):
|
||||
selection = detection_dropdown.get()
|
||||
mode_map = {
|
||||
"🔊 Loud Moments": "loud",
|
||||
"🎬 Scene Changes": "scene",
|
||||
"🏃 Motion Intensity": "motion",
|
||||
"😄 Emotional Speech": "speech",
|
||||
"🎵 Audio Peaks": "peaks",
|
||||
"🎯 Smart Combined": "combined"
|
||||
}
|
||||
self.detection_mode_var.set(mode_map.get(selection, "loud"))
|
||||
|
||||
# Update tooltip text for the selected mode
|
||||
dropdown_tooltip.text = mode_descriptions.get(selection, "Select a detection mode")
|
||||
|
||||
# Show/hide threshold setting based on mode
|
||||
if selection == "🔊 Loud Moments":
|
||||
threshold_frame.pack(fill="x", pady=5)
|
||||
else:
|
||||
threshold_frame.pack_forget()
|
||||
|
||||
detection_dropdown.bind("<<ComboboxSelected>>", on_detection_change)
|
||||
|
||||
# Audio threshold (only shown for loud moments)
|
||||
threshold_frame = tk.Frame(settings_frame)
|
||||
threshold_frame.pack(fill="x", pady=5)
|
||||
tk.Label(threshold_frame, text="Audio Threshold (dB):").pack(side="left")
|
||||
threshold_label = tk.Label(threshold_frame, text="Audio Threshold (dB):")
|
||||
threshold_label.pack(side="left")
|
||||
self.threshold_var = tk.IntVar(value=-30)
|
||||
threshold_spinbox = tk.Spinbox(threshold_frame, from_=-50, to=0, width=5, textvariable=self.threshold_var)
|
||||
threshold_spinbox.pack(side="right")
|
||||
|
||||
# Add tooltip for threshold setting
|
||||
threshold_tooltip_text = """Audio Threshold Control:
|
||||
|
||||
• Higher values (closer to 0): Only very loud moments
|
||||
• Lower values (closer to -50): More moments detected
|
||||
• Default -30 dB: Good balance for most videos
|
||||
• Adjust based on your video's audio levels
|
||||
|
||||
Example: Gaming videos might need -20 dB, quiet vlogs might need -40 dB"""
|
||||
ToolTip(threshold_spinbox, threshold_tooltip_text, side='right')
|
||||
|
||||
# Clip duration (increased to 120 seconds max)
|
||||
duration_frame = tk.Frame(settings_frame)
|
||||
duration_frame.pack(fill="x", pady=5)
|
||||
tk.Label(duration_frame, text="Clip Duration (seconds):").pack(side="left")
|
||||
duration_label = tk.Label(duration_frame, text="Clip Duration (seconds):")
|
||||
duration_label.pack(side="left")
|
||||
self.duration_var = tk.IntVar(value=5)
|
||||
duration_spinbox = tk.Spinbox(duration_frame, from_=3, to=120, width=5, textvariable=self.duration_var)
|
||||
duration_spinbox.pack(side="right")
|
||||
|
||||
# Add tooltip for duration setting
|
||||
duration_tooltip_text = """Clip Duration Setting:
|
||||
|
||||
• 3-10 seconds: Perfect for TikTok/Instagram Reels
|
||||
• 10-30 seconds: Good for YouTube Shorts
|
||||
• 30-60 seconds: Longer form highlights
|
||||
• 60+ seconds: Extended content clips
|
||||
|
||||
Shorter clips = more viral potential
|
||||
Longer clips = more context and story"""
|
||||
ToolTip(duration_spinbox, duration_tooltip_text, side='right')
|
||||
|
||||
# Preview button
|
||||
self.preview_btn = tk.Button(self.root, text="🔍 Preview Clips",
|
||||
command=self.preview_clips, bg="#2196F3", fg="white",
|
||||
font=("Arial", 10, "bold"), pady=5)
|
||||
self.preview_btn.pack(pady=10)
|
||||
self.preview_btn.pack(pady=5)
|
||||
|
||||
# Add tooltip for preview button
|
||||
preview_tooltip_text = """Preview Clips Feature:
|
||||
|
||||
• Analyzes your video using the selected detection mode
|
||||
• Shows all detected moments with timestamps
|
||||
• Lets you select specific clips to generate
|
||||
• No video files created - just analysis
|
||||
• Great for testing settings before full generation
|
||||
|
||||
Tip: Always preview first to see what the AI finds!"""
|
||||
ToolTip(self.preview_btn, preview_tooltip_text, side='right')
|
||||
|
||||
# Generate button
|
||||
self.generate_btn = tk.Button(self.root, text="🎬 Generate Shorts",
|
||||
command=self.start_generation, bg="#4CAF50", fg="white",
|
||||
font=("Arial", 12, "bold"), pady=10)
|
||||
self.generate_btn.pack(pady=20)
|
||||
self.generate_btn.pack(pady=10)
|
||||
|
||||
# Add tooltip for generate button
|
||||
generate_tooltip_text = """Generate Shorts Feature:
|
||||
|
||||
• Creates actual video files from detected moments
|
||||
• Adds AI-generated subtitles to each clip
|
||||
• Formats videos for vertical social media (1080x1920)
|
||||
• Saves clips to your selected output folder
|
||||
• Takes longer but creates ready-to-post content
|
||||
|
||||
Tip: Use Preview first to fine-tune your settings!"""
|
||||
ToolTip(self.generate_btn, generate_tooltip_text, side='right')
|
||||
|
||||
# Progress frame
|
||||
progress_frame = tk.Frame(self.root)
|
||||
progress_frame.pack(pady=10, padx=20, fill="x")
|
||||
progress_frame.pack(pady=5, padx=20, fill="x")
|
||||
|
||||
self.progress_label = tk.Label(progress_frame, text="Ready to generate shorts")
|
||||
self.progress_label.pack()
|
||||
|
||||
self.progress_bar = ttk.Progressbar(progress_frame, length=400, mode="determinate")
|
||||
self.progress_bar.pack(pady=5)
|
||||
self.progress_bar.pack(pady=3)
|
||||
|
||||
def select_video(self):
|
||||
file_path = filedialog.askopenfilename(
|
||||
@ -327,18 +862,44 @@ class ShortsGeneratorGUI:
|
||||
# Validate video first
|
||||
validate_video(self.video_path, min_duration=self.duration_var.get() * 2)
|
||||
|
||||
# Analyze for loud moments
|
||||
# Analyze using selected detection mode
|
||||
self.preview_btn.config(state="disabled", text="Analyzing...")
|
||||
self.root.update()
|
||||
|
||||
loud_moments = detect_loud_moments(
|
||||
self.video_path,
|
||||
chunk_duration=self.duration_var.get(),
|
||||
threshold_db=self.threshold_var.get()
|
||||
)
|
||||
detection_mode = self.detection_mode_var.get()
|
||||
|
||||
if not loud_moments:
|
||||
messagebox.showinfo("Preview", f"No loud moments found with threshold {self.threshold_var.get()} dB.\nTry lowering the threshold.")
|
||||
if detection_mode == "loud":
|
||||
moments = detect_loud_moments(
|
||||
self.video_path,
|
||||
chunk_duration=self.duration_var.get(),
|
||||
threshold_db=self.threshold_var.get()
|
||||
)
|
||||
mode_name = "loud moments"
|
||||
elif detection_mode == "scene":
|
||||
moments = detect_scene_changes(self.video_path, chunk_duration=self.duration_var.get())
|
||||
mode_name = "scene changes"
|
||||
elif detection_mode == "motion":
|
||||
moments = detect_motion_intensity(self.video_path, chunk_duration=self.duration_var.get())
|
||||
mode_name = "motion moments"
|
||||
elif detection_mode == "speech":
|
||||
moments = detect_speech_emotion(self.video_path, chunk_duration=self.duration_var.get())
|
||||
mode_name = "emotional speech"
|
||||
elif detection_mode == "peaks":
|
||||
moments = detect_audio_peaks(self.video_path, chunk_duration=self.duration_var.get())
|
||||
mode_name = "audio peaks"
|
||||
elif detection_mode == "combined":
|
||||
moments = detect_combined_intensity(self.video_path, chunk_duration=self.duration_var.get())
|
||||
mode_name = "interesting moments"
|
||||
else:
|
||||
moments = detect_loud_moments(
|
||||
self.video_path,
|
||||
chunk_duration=self.duration_var.get(),
|
||||
threshold_db=self.threshold_var.get()
|
||||
)
|
||||
mode_name = "loud moments"
|
||||
|
||||
if not moments:
|
||||
messagebox.showinfo("Preview", f"No {mode_name} found.\nTry a different detection mode or adjust settings.")
|
||||
return
|
||||
|
||||
# Show preview window
|
||||
@ -346,7 +907,7 @@ class ShortsGeneratorGUI:
|
||||
preview_window.title("Preview and Select Clips")
|
||||
preview_window.geometry("500x400")
|
||||
|
||||
tk.Label(preview_window, text=f"Found {len(loud_moments)} loud moments:", font=("Arial", 12, "bold")).pack(pady=10)
|
||||
tk.Label(preview_window, text=f"Found {len(moments)} {mode_name}:", font=("Arial", 12, "bold")).pack(pady=10)
|
||||
|
||||
# Create scrollable frame for checkboxes
|
||||
canvas = tk.Canvas(preview_window)
|
||||
@ -364,7 +925,7 @@ class ShortsGeneratorGUI:
|
||||
# Store checkbox variables and clip data
|
||||
self.clip_vars = []
|
||||
# Use all clips if max clips is disabled, otherwise limit by setting
|
||||
clips_to_show = loud_moments if not self.use_max_clips.get() else loud_moments[:self.clips_var.get()]
|
||||
clips_to_show = moments if not self.use_max_clips.get() else moments[:self.clips_var.get()]
|
||||
self.preview_clips_data = clips_to_show
|
||||
|
||||
# Add selectable clips with checkboxes
|
||||
@ -510,11 +1071,12 @@ class ShortsGeneratorGUI:
|
||||
|
||||
generate_shorts(
|
||||
self.video_path,
|
||||
max_clips=self.clips_var.get() if self.use_max_clips.get() else len(detect_loud_moments(self.video_path, chunk_duration=self.duration_var.get(), threshold_db=self.threshold_var.get())),
|
||||
max_clips=self.clips_var.get() if self.use_max_clips.get() else 10, # Default max for non-loud modes
|
||||
output_folder=self.output_folder,
|
||||
progress_callback=self.update_progress,
|
||||
threshold_db=self.threshold_var.get(),
|
||||
clip_duration=self.duration_var.get()
|
||||
clip_duration=self.duration_var.get(),
|
||||
detection_mode=self.detection_mode_var.get()
|
||||
)
|
||||
messagebox.showinfo("Success", f"Successfully generated shorts in '{self.output_folder}' folder!")
|
||||
except FileNotFoundError as e:
|
||||
@ -547,18 +1109,14 @@ def run_gui():
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
if len(sys.argv) > 1 and sys.argv[1] == "--gui":
|
||||
# Run GUI mode
|
||||
run_gui()
|
||||
elif len(sys.argv) < 2:
|
||||
print("Usage: python shorts_generator2.py your_video.mp4")
|
||||
print(" or: python shorts_generator2.py --gui")
|
||||
run_gui() # Default to GUI if no args
|
||||
else:
|
||||
if len(sys.argv) > 1 and sys.argv[1] != "--gui":
|
||||
# Run command line mode
|
||||
try:
|
||||
generate_shorts(sys.argv[1])
|
||||
print("✅ Shorts generation completed successfully!")
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {str(e)}")
|
||||
else:
|
||||
# Run GUI mode (default)
|
||||
run_gui()
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user