ShortGenerator/shorts_generator2.py

2111 lines
89 KiB
Python

import os
import numpy as np
from moviepy import VideoFileClip, TextClip, CompositeVideoClip
from moviepy.video.fx import FadeIn, FadeOut, Resize
from moviepy.audio.fx import MultiplyVolume
from faster_whisper import WhisperModel
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import threading
import cv2
from scipy import signal
import librosa
import glob
import json
from datetime import datetime
class ToolTip:
"""Create a tooltip for a given widget"""
def __init__(self, widget, text='widget info', side='right'):
self.widget = widget
self.text = text
self.side = side
self.widget.bind("<Enter>", self.enter)
self.widget.bind("<Leave>", self.leave)
self.tipwindow = None
def enter(self, event=None):
self.showtip()
def leave(self, event=None):
self.hidetip()
def showtip(self):
if self.tipwindow or not self.text:
return
# Get widget position
x = self.widget.winfo_rootx()
y = self.widget.winfo_rooty()
w = self.widget.winfo_width()
h = self.widget.winfo_height()
# Position tooltip based on side preference
if self.side == 'right':
x = x + w + 10 # 10px to the right of widget
y = y
else:
x = x + 25
y = y + h + 5
self.tipwindow = tw = tk.Toplevel(self.widget)
tw.wm_overrideredirect(True)
tw.wm_geometry("+%d+%d" % (x, y))
label = tk.Label(tw, text=self.text, justify=tk.LEFT,
background="#ffffe0", relief=tk.SOLID, borderwidth=1,
font=("Arial", "9", "normal"), wraplength=350)
label.pack(ipadx=5, ipady=3)
def hidetip(self):
tw = self.tipwindow
self.tipwindow = None
if tw:
tw.destroy()
class ComboboxTooltip:
"""Special tooltip for combobox that shows on listbox hover"""
def __init__(self, combobox, descriptions):
self.combobox = combobox
self.descriptions = descriptions
self.tipwindow = None
self.bound_listbox = None
# Bind to combobox events
self.combobox.bind("<Button-1>", self.on_click)
self.combobox.bind("<KeyPress>", self.on_keypress)
def on_click(self, event):
# Try to find the listbox when dropdown opens
self.combobox.after(50, self.bind_listbox)
def on_keypress(self, event):
# Handle keyboard navigation
self.combobox.after(50, self.bind_listbox)
def bind_listbox(self):
# Find the listbox widget more reliably
try:
# Look through all toplevel windows for the combobox popdown
for window in self.combobox.winfo_toplevel().winfo_children():
window_class = window.winfo_class()
if window_class == 'Toplevel':
# Found a toplevel, look for listbox inside
for child in window.winfo_children():
if child.winfo_class() == 'Listbox':
if self.bound_listbox != child:
self.bound_listbox = child
child.bind("<Motion>", self.on_listbox_motion)
child.bind("<Leave>", self.on_listbox_leave)
child.bind("<ButtonRelease-1>", self.on_listbox_leave)
return
except Exception as e:
# Fallback method - try to find any listbox
try:
# Alternative approach: look for the popdown frame
for child in self.combobox.tk.call('winfo', 'children', '.'):
if 'popdown' in str(child):
popdown = self.combobox.nametowidget(child)
for subchild in popdown.winfo_children():
if subchild.winfo_class() == 'Listbox':
if self.bound_listbox != subchild:
self.bound_listbox = subchild
subchild.bind("<Motion>", self.on_listbox_motion)
subchild.bind("<Leave>", self.on_listbox_leave)
subchild.bind("<ButtonRelease-1>", self.on_listbox_leave)
return
except:
pass
def on_listbox_motion(self, event):
try:
listbox = event.widget
index = listbox.nearest(event.y)
if 0 <= index < len(self.combobox['values']):
selection = self.combobox['values'][index]
if selection in self.descriptions:
self.show_tooltip(event, self.descriptions[selection])
except Exception:
pass
def on_listbox_leave(self, event):
self.hide_tooltip()
def show_tooltip(self, event, text):
self.hide_tooltip() # Hide any existing tooltip
try:
x = event.widget.winfo_rootx() + event.widget.winfo_width() + 10
y = event.widget.winfo_rooty() + event.y - 20
self.tipwindow = tw = tk.Toplevel(event.widget)
tw.wm_overrideredirect(True)
tw.wm_geometry("+%d+%d" % (x, y))
label = tk.Label(tw, text=text, justify=tk.LEFT,
background="#ffffe0", relief=tk.SOLID, borderwidth=1,
font=("Arial", "9", "normal"), wraplength=350)
label.pack(ipadx=5, ipady=3)
except Exception:
pass
def hide_tooltip(self):
if self.tipwindow:
try:
self.tipwindow.destroy()
except:
pass
self.tipwindow = None
def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
print("🔍 Analyzing audio...")
clip = VideoFileClip(video_path)
audio = clip.audio.to_soundarray(fps=44100)
volume = np.linalg.norm(audio, axis=1)
chunk_size = int(chunk_duration * 44100)
loud_chunks = []
max_db = -float('inf')
for i in range(0, len(volume), chunk_size):
chunk = volume[i:i+chunk_size]
db = 20 * np.log10(np.mean(chunk) + 1e-10)
max_db = max(max_db, db)
if db > threshold_db:
start = i / 44100
loud_chunks.append((start, min(start + chunk_duration, clip.duration)))
print(f"🔊 Max volume found: {max_db:.2f} dB, threshold: {threshold_db} dB")
print(f"📈 Found {len(loud_chunks)} loud moments")
clip.close()
return loud_chunks
def detect_scene_changes(video_path, chunk_duration=5, threshold=0.3):
"""Detect dramatic visual scene changes"""
print("🎬 Analyzing scene changes...")
clip = VideoFileClip(video_path)
# Sample frames at regular intervals
sample_rate = 2 # Check every 2 seconds
times = np.arange(0, clip.duration, sample_rate)
scene_changes = []
prev_frame = None
for i, t in enumerate(times[:-1]):
try:
# Get current and next frame
frame1 = clip.get_frame(t)
frame2 = clip.get_frame(times[i + 1])
# Convert to grayscale and resize for faster processing
gray1 = cv2.cvtColor(frame1, cv2.COLOR_RGB2GRAY)
gray2 = cv2.cvtColor(frame2, cv2.COLOR_RGB2GRAY)
gray1 = cv2.resize(gray1, (160, 90)) # Small size for speed
gray2 = cv2.resize(gray2, (160, 90))
# Calculate structural similarity difference
diff = np.mean(np.abs(gray1.astype(float) - gray2.astype(float))) / 255.0
if diff > threshold:
start = max(0, t - chunk_duration/2)
end = min(clip.duration, t + chunk_duration/2)
scene_changes.append((start, end))
except Exception as e:
print(f"⚠️ Frame analysis error at {t:.1f}s: {e}")
continue
print(f"🎬 Found {len(scene_changes)} scene changes")
clip.close()
return scene_changes
def detect_motion_intensity(video_path, chunk_duration=5, threshold=0.15):
"""Detect high motion/action moments"""
print("🏃 Analyzing motion intensity...")
clip = VideoFileClip(video_path)
sample_rate = 1 # Check every second
times = np.arange(0, clip.duration - 1, sample_rate)
motion_moments = []
for t in times:
try:
# Get two consecutive frames
frame1 = clip.get_frame(t)
frame2 = clip.get_frame(t + 0.5) # Half second later
# Convert to grayscale and resize
gray1 = cv2.cvtColor(frame1, cv2.COLOR_RGB2GRAY)
gray2 = cv2.cvtColor(frame2, cv2.COLOR_RGB2GRAY)
gray1 = cv2.resize(gray1, (160, 90))
gray2 = cv2.resize(gray2, (160, 90))
# Calculate optical flow magnitude
flow = cv2.calcOpticalFlowPyrLK(gray1, gray2,
np.random.randint(0, 160, (100, 1, 2)).astype(np.float32),
None)[0]
if flow is not None:
motion_magnitude = np.mean(np.linalg.norm(flow.reshape(-1, 2), axis=1))
if motion_magnitude > threshold:
start = max(0, t - chunk_duration/2)
end = min(clip.duration, t + chunk_duration/2)
motion_moments.append((start, end))
except Exception as e:
print(f"⚠️ Motion analysis error at {t:.1f}s: {e}")
continue
print(f"🏃 Found {len(motion_moments)} high-motion moments")
clip.close()
return motion_moments
def detect_speech_emotion(video_path, chunk_duration=5):
"""Detect emotional/excited speech patterns"""
print("😄 Analyzing speech emotions...")
# Use Whisper to get detailed speech analysis
model = WhisperModel("base", device="cpu", compute_type="int8")
segments, _ = model.transcribe(video_path, beam_size=5, vad_filter=True, word_timestamps=True)
emotional_moments = []
excitement_keywords = ['wow', 'amazing', 'incredible', 'unbelievable', 'awesome', 'fantastic',
'omg', 'what', 'no way', 'crazy', 'insane', 'perfect', 'yes', 'exactly']
for segment in segments:
text = segment.text.lower()
# Check for excitement keywords
has_keywords = any(keyword in text for keyword in excitement_keywords)
# Check for multiple exclamation-worthy patterns
has_caps = any(word.isupper() for word in segment.text.split())
has_punctuation = '!' in segment.text or '?' in segment.text
is_short_excited = len(text.split()) <= 5 and (has_keywords or has_caps)
if has_keywords or has_punctuation or is_short_excited:
start = max(0, segment.start - chunk_duration/2)
end = min(segment.end + chunk_duration/2, segment.end + chunk_duration)
emotional_moments.append((start, end))
print(f"😄 Found {len(emotional_moments)} emotional speech moments")
return emotional_moments
def detect_audio_peaks(video_path, chunk_duration=5):
"""Detect sudden audio peaks (bass drops, beats, impacts)"""
print("🎵 Analyzing audio peaks...")
clip = VideoFileClip(video_path)
audio = clip.audio.to_soundarray(fps=22050) # Lower sample rate for speed
# Convert to mono if stereo
if len(audio.shape) > 1:
audio = np.mean(audio, axis=1)
# Find spectral peaks (bass, treble spikes)
peaks, _ = signal.find_peaks(np.abs(audio), height=np.percentile(np.abs(audio), 95))
peak_moments = []
prev_peak = 0
for peak in peaks:
peak_time = peak / 22050
# Avoid too close peaks
if peak_time - prev_peak > chunk_duration:
start = max(0, peak_time - chunk_duration/2)
end = min(clip.duration, peak_time + chunk_duration/2)
peak_moments.append((start, end))
prev_peak = peak_time
print(f"🎵 Found {len(peak_moments)} audio peak moments")
clip.close()
return peak_moments
def detect_combined_intensity(video_path, chunk_duration=5, weights=None):
"""Combine multiple detection methods for best moments"""
print("🎯 Running comprehensive moment analysis...")
if weights is None:
weights = {'loud': 0.3, 'scene': 0.2, 'motion': 0.2, 'speech': 0.2, 'peaks': 0.1}
# Get all detection results
loud_moments = detect_loud_moments(video_path, chunk_duration, threshold_db=5) # Lower threshold
scene_moments = detect_scene_changes(video_path, chunk_duration)
motion_moments = detect_motion_intensity(video_path, chunk_duration)
speech_moments = detect_speech_emotion(video_path, chunk_duration)
peak_moments = detect_audio_peaks(video_path, chunk_duration)
# Create time-based scoring
clip = VideoFileClip(video_path)
duration = clip.duration
clip.close()
# Score each second of the video
time_scores = {}
for moments, weight in [(loud_moments, weights['loud']),
(scene_moments, weights['scene']),
(motion_moments, weights['motion']),
(speech_moments, weights['speech']),
(peak_moments, weights['peaks'])]:
for start, end in moments:
for t in range(int(start), int(end) + 1):
if t not in time_scores:
time_scores[t] = 0
time_scores[t] += weight
# Find the highest scoring segments
if not time_scores:
return loud_moments # Fallback to loud moments
# Get top scoring time periods
sorted_times = sorted(time_scores.items(), key=lambda x: x[1], reverse=True)
combined_moments = []
used_times = set()
for time_sec, score in sorted_times:
if time_sec not in used_times and score > 0.3: # Minimum threshold
start = max(0, time_sec - chunk_duration/2)
end = min(duration, time_sec + chunk_duration/2)
combined_moments.append((start, end))
# Mark nearby times as used to avoid overlap
for t in range(max(0, time_sec - chunk_duration),
min(int(duration), time_sec + chunk_duration)):
used_times.add(t)
print(f"🎯 Found {len(combined_moments)} high-intensity combined moments")
return combined_moments
def transcribe_and_extract_subtitles(video_path, start, end):
print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...")
model = WhisperModel("base", device="cpu", compute_type="int8")
segments, _ = model.transcribe(video_path, beam_size=5, language="en", vad_filter=True)
subtitles = []
for segment in segments:
if start <= segment.start <= end:
subtitles.append((segment.start - start, segment.end - start, segment.text))
return subtitles
def create_short_clip(video_path, start, end, subtitles, output_path):
print(f"🎬 Creating short: {output_path}")
clip = VideoFileClip(video_path).subclipped(start, end)
video_duration = clip.duration
print(f"📏 Video clip duration: {video_duration:.2f}s")
vertical_clip = clip.resized(height=1920).cropped(width=1080, x_center=clip.w / 2)
clips = [vertical_clip]
subtitle_y_px = 1550 # Fixed Y position for subtitles
for (s, e, text) in subtitles:
try:
subtitle_start = max(0, s)
subtitle_end = min(e, video_duration)
if subtitle_start >= video_duration or subtitle_end <= subtitle_start:
print(f"⚠️ Skipping subtitle outside video duration: {text[:30]}...")
continue
words = text.strip().split()
if not words:
continue
# Split into small readable chunks (max ~3-4 words)
chunks = []
current_chunk = []
for word in words:
current_chunk.append(word)
if len(current_chunk) >= 2 or len(' '.join(current_chunk)) > 25:
chunks.append(' '.join(current_chunk))
current_chunk = []
if current_chunk:
chunks.append(' '.join(current_chunk))
chunk_duration = (subtitle_end - subtitle_start) / len(chunks)
for chunk_idx, chunk_text in enumerate(chunks):
chunk_start = subtitle_start + (chunk_idx * chunk_duration)
chunk_end = min(chunk_start + chunk_duration, subtitle_end)
chunk_words = chunk_text.split()
# Base subtitle
base_subtitle = TextClip(
text=chunk_text.upper(),
font_size=65,
color='white',
stroke_color='black',
stroke_width=5
)
text_width, _ = base_subtitle.size
base_subtitle = base_subtitle.with_start(chunk_start).with_end(chunk_end).with_position(('center', subtitle_y_px))
clips.append(base_subtitle)
# Highlighted words (perfectly aligned)
word_duration = chunk_duration / len(chunk_words)
current_x = 540 - (text_width / 2) # 540 is center X of 1080px width
for i, word in enumerate(chunk_words):
word_start = chunk_start + (i * word_duration)
word_end = min(word_start + word_duration * 0.8, chunk_end)
highlighted_word = TextClip(
text=word.upper(),
font_size=68,
color='#FFD700',
stroke_color='#FF6B35',
stroke_width=5
)
word_width, _ = highlighted_word.size
word_x = current_x + (word_width / 2)
highlighted_word = highlighted_word.with_start(word_start).with_end(word_end).with_position((word_x -125
, subtitle_y_px))
clips.append(highlighted_word)
current_x += word_width + 20 # Add spacing between words
print(f"✅ Added Opus-style subtitle ({subtitle_start:.1f}s-{subtitle_end:.1f}s): {text[:30]}...")
except Exception as e:
print(f"⚠️ Subtitle error: {e}, skipping subtitle: {text[:50]}...")
continue
final = CompositeVideoClip(clips, size=(1080, 1920))
final.write_videofile(output_path, codec="libx264", audio_codec="aac", threads=1)
clip.reader.close()
if clip.audio:
clip.audio.reader.close()
final.close()
def validate_video(video_path, min_duration=30):
"""Validate video file and return duration"""
try:
clip = VideoFileClip(video_path)
duration = clip.duration
clip.close()
if duration < min_duration:
raise ValueError(f"Video is too short ({duration:.1f}s). Minimum {min_duration}s required.")
return duration
except Exception as e:
if "No such file" in str(e):
raise FileNotFoundError(f"Video file not found: {video_path}")
elif "could not open" in str(e).lower():
raise ValueError(f"Invalid or corrupted video file: {video_path}")
else:
raise ValueError(f"Error reading video: {str(e)}")
def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_callback=None,
threshold_db=-30, clip_duration=5, detection_mode="loud"):
os.makedirs(output_folder, exist_ok=True)
# Validate video first
try:
video_duration = validate_video(video_path, min_duration=clip_duration * 2)
if progress_callback:
progress_callback(f"✅ Video validated ({video_duration:.1f}s)", 5)
except Exception as e:
if progress_callback:
progress_callback(f"❌ Video validation failed", 0)
raise e
# Choose detection method based on mode
if detection_mode == "loud":
if progress_callback:
progress_callback("🔍 Analyzing audio for loud moments...", 10)
best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)
elif detection_mode == "scene":
if progress_callback:
progress_callback("🎬 Analyzing scene changes...", 10)
best_moments = detect_scene_changes(video_path, chunk_duration=clip_duration)
elif detection_mode == "motion":
if progress_callback:
progress_callback("🏃 Analyzing motion intensity...", 10)
best_moments = detect_motion_intensity(video_path, chunk_duration=clip_duration)
elif detection_mode == "speech":
if progress_callback:
progress_callback("😄 Analyzing speech emotions...", 10)
best_moments = detect_speech_emotion(video_path, chunk_duration=clip_duration)
elif detection_mode == "peaks":
if progress_callback:
progress_callback("🎵 Analyzing audio peaks...", 10)
best_moments = detect_audio_peaks(video_path, chunk_duration=clip_duration)
elif detection_mode == "combined":
if progress_callback:
progress_callback("🎯 Running comprehensive analysis...", 10)
best_moments = detect_combined_intensity(video_path, chunk_duration=clip_duration)
else:
best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)
selected = best_moments[:max_clips]
if not selected:
mode_name = {
"loud": "loud moments", "scene": "scene changes", "motion": "motion intensity",
"speech": "emotional speech", "peaks": "audio peaks", "combined": "interesting moments"
}.get(detection_mode, "moments")
raise ValueError(f"No {mode_name} found. Try a different detection mode or adjust settings.")
if progress_callback:
progress_callback(f"📊 Found {len(selected)} clips to generate", 20)
for i, (start, end) in enumerate(selected):
if progress_callback:
progress_callback(f"🗣️ Transcribing clip {i+1}/{len(selected)}", 30 + (i * 20))
subtitles = transcribe_and_extract_subtitles(video_path, start, end)
out_path = os.path.join(output_folder, f"short_{i+1}.mp4")
if progress_callback:
progress_callback(f"🎬 Creating video {i+1}/{len(selected)}", 50 + (i * 20))
create_short_clip(video_path, start, end, subtitles, out_path)
if progress_callback:
progress_callback("✅ All shorts generated successfully!", 100)
# Video Editing Tools
class VideoEditor:
"""Professional video editing tools for generated shorts"""
@staticmethod
def trim_video(video_path, start_time, end_time, output_path):
"""Trim video to specific time range"""
clip = VideoFileClip(video_path)
trimmed = clip.subclipped(start_time, end_time)
trimmed.write_videofile(output_path, codec="libx264", audio_codec="aac")
clip.close()
trimmed.close()
@staticmethod
def adjust_speed(video_path, speed_factor, output_path):
"""Change video playback speed (0.5 = half speed, 2.0 = double speed)"""
clip = VideoFileClip(video_path)
if speed_factor > 1:
# Speed up
speeded = clip.with_fps(clip.fps * speed_factor).subclipped(0, clip.duration / speed_factor)
else:
# Slow down
speeded = clip.with_fps(clip.fps * speed_factor)
speeded.write_videofile(output_path, codec="libx264", audio_codec="aac")
clip.close()
speeded.close()
@staticmethod
def add_fade_effects(video_path, fade_in_duration=1.0, fade_out_duration=1.0, output_path=None):
"""Add fade in/out effects"""
clip = VideoFileClip(video_path)
# Apply fade effects
final_clip = clip
if fade_in_duration > 0:
final_clip = final_clip.with_effects([FadeIn(fade_in_duration)])
if fade_out_duration > 0:
final_clip = final_clip.with_effects([FadeOut(fade_out_duration)])
if not output_path:
output_path = video_path.replace('.mp4', '_faded.mp4')
final_clip.write_videofile(output_path, codec="libx264", audio_codec="aac")
clip.close()
final_clip.close()
return output_path
@staticmethod
def adjust_volume(video_path, volume_factor, output_path=None):
"""Adjust audio volume (1.0 = normal, 0.5 = half volume, 2.0 = double volume)"""
clip = VideoFileClip(video_path)
if clip.audio:
audio_adjusted = clip.audio.with_effects([MultiplyVolume(volume_factor)])
final_clip = clip.with_audio(audio_adjusted)
else:
final_clip = clip
if not output_path:
output_path = video_path.replace('.mp4', '_volume_adjusted.mp4')
final_clip.write_videofile(output_path, codec="libx264", audio_codec="aac")
clip.close()
final_clip.close()
return output_path
@staticmethod
def resize_video(video_path, width, height, output_path=None):
"""Resize video to specific dimensions"""
clip = VideoFileClip(video_path)
resized = clip.resized((width, height))
if not output_path:
output_path = video_path.replace('.mp4', f'_resized_{width}x{height}.mp4')
resized.write_videofile(output_path, codec="libx264", audio_codec="aac")
clip.close()
resized.close()
return output_path
@staticmethod
def crop_video(video_path, x1, y1, x2, y2, output_path=None):
"""Crop video to specific coordinates"""
clip = VideoFileClip(video_path)
cropped = clip.cropped(x1=x1, y1=y1, x2=x2, y2=y2)
if not output_path:
output_path = video_path.replace('.mp4', '_cropped.mp4')
cropped.write_videofile(output_path, codec="libx264", audio_codec="aac")
clip.close()
cropped.close()
return output_path
@staticmethod
def add_text_overlay(video_path, text, position=('center', 'bottom'),
duration=None, start_time=0, font_size=50, color='white', output_path=None):
"""Add text overlay to video (optimized for speed)"""
print(f"🎬 Adding text overlay: '{text}'...")
clip = VideoFileClip(video_path)
if duration is None:
duration = clip.duration - start_time
# Optimize text creation - use smaller cache and faster rendering
try:
# Try using a more efficient text creation method
text_clip = TextClip(
text,
font_size=font_size,
color=color,
stroke_color='black',
stroke_width=2,
method='caption', # Faster rendering method
size=(clip.w * 0.8, None) # Limit width to prevent huge text
)
print(f"📝 Text clip created successfully...")
except Exception as e:
print(f"⚠️ Using fallback text method: {e}")
# Fallback to basic text creation
text_clip = TextClip(
text,
font_size=font_size,
color=color,
stroke_color='black',
stroke_width=2
)
# Set timing and position
text_clip = text_clip.with_start(start_time).with_end(start_time + duration).with_position(position)
print(f"⏱️ Compositing video with text overlay...")
# Optimize composition with reduced quality for faster processing
final_video = CompositeVideoClip([clip, text_clip])
if not output_path:
output_path = video_path.replace('.mp4', '_with_text.mp4')
print(f"💾 Saving video to: {output_path}")
# Optimize output settings for faster processing
try:
# Try with all optimization parameters (newer MoviePy)
final_video.write_videofile(
output_path,
codec="libx264",
audio_codec="aac",
temp_audiofile='temp-audio.m4a',
remove_temp=True,
verbose=False, # Reduce console output
logger=None, # Disable logging for speed
preset='ultrafast', # Fastest encoding preset
threads=4 # Use multiple threads
)
except TypeError:
# Fallback for older MoviePy versions
final_video.write_videofile(
output_path,
codec="libx264",
audio_codec="aac",
temp_audiofile='temp-audio.m4a',
remove_temp=True,
preset='ultrafast', # Fastest encoding preset
threads=4 # Use multiple threads
)
# Clean up
clip.close()
text_clip.close()
final_video.close()
print(f"✅ Text overlay completed!")
return output_path
@staticmethod
def add_text_overlay_fast(video_path, text, position=('center', 'bottom'),
font_size=50, color='white', output_path=None):
"""Ultra-fast text overlay using PIL (for simple text only)"""
try:
from PIL import Image, ImageDraw, ImageFont
import cv2
print(f"🚀 Using fast text overlay method...")
# Read video with OpenCV for faster processing
cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
if not output_path:
output_path = video_path.replace('.mp4', '_with_text_fast.mp4')
# Set up video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
frame_count = 0
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# Calculate text position
if position == ('center', 'bottom'):
text_x, text_y = width // 2, height - 100
elif position == ('center', 'top'):
text_x, text_y = width // 2, 100
elif position == ('center', 'center'):
text_x, text_y = width // 2, height // 2
else:
text_x, text_y = width // 2, height - 100 # Default
print(f"📹 Processing {total_frames} frames...")
while True:
ret, frame = cap.read()
if not ret:
break
# Convert BGR to RGB for PIL
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(frame_rgb)
draw = ImageDraw.Draw(pil_image)
# Try to use a system font, fallback to default
try:
font = ImageFont.truetype("arial.ttf", font_size)
except:
try:
font = ImageFont.truetype("calibri.ttf", font_size)
except:
try:
font = ImageFont.truetype("tahoma.ttf", font_size)
except:
font = ImageFont.load_default()
print(f"📝 Using default font (system fonts not found)")
# Add text with outline effect (centered text)
text_bbox = draw.textbbox((0, 0), text, font=font)
text_width = text_bbox[2] - text_bbox[0]
text_height = text_bbox[3] - text_bbox[1]
# Center the text properly
centered_x = text_x - (text_width // 2)
centered_y = text_y - (text_height // 2)
outline_width = 2
for adj_x in range(-outline_width, outline_width + 1):
for adj_y in range(-outline_width, outline_width + 1):
draw.text((centered_x + adj_x, centered_y + adj_y), text, font=font, fill='black')
# Add main text
draw.text((centered_x, centered_y), text, font=font, fill=color)
# Convert back to BGR for OpenCV
frame_with_text = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
out.write(frame_with_text)
frame_count += 1
if frame_count % 30 == 0: # Progress every 30 frames
progress = (frame_count / total_frames) * 100
print(f"🎬 Progress: {progress:.1f}%")
cap.release()
out.release()
# Add audio back using MoviePy (faster than re-encoding everything)
print(f"🔊 Adding audio track...")
video_with_audio = VideoFileClip(video_path)
video_with_text = VideoFileClip(output_path)
final_video = video_with_text.with_audio(video_with_audio.audio)
temp_output = output_path.replace('.mp4', '_temp.mp4')
try:
# Try with verbose parameter (newer MoviePy)
final_video.write_videofile(temp_output, codec="libx264", audio_codec="aac",
verbose=False, logger=None)
except TypeError:
# Fallback for older MoviePy versions without verbose parameter
final_video.write_videofile(temp_output, codec="libx264", audio_codec="aac")
# Replace original with final version
import os
os.remove(output_path)
os.rename(temp_output, output_path)
video_with_audio.close()
video_with_text.close()
final_video.close()
print(f"✅ Fast text overlay completed!")
return output_path
except ImportError:
print(f"⚠️ PIL not available, falling back to MoviePy method...")
return VideoEditor.add_text_overlay(video_path, text, position,
font_size=font_size, color=color, output_path=output_path)
except Exception as e:
print(f"⚠️ Fast method failed ({e}), falling back to MoviePy...")
return VideoEditor.add_text_overlay(video_path, text, position,
font_size=font_size, color=color, output_path=output_path)
@staticmethod
def get_video_info(video_path):
"""Get basic video information"""
clip = VideoFileClip(video_path)
info = {
'duration': clip.duration,
'fps': clip.fps,
'size': clip.size,
'has_audio': clip.audio is not None
}
clip.close()
return info
# Post-Generation Editing Interface
class ShortsEditorGUI:
"""Interface for editing generated shorts"""
def __init__(self, parent, shorts_folder="shorts"):
self.parent = parent
self.shorts_folder = shorts_folder
self.current_video = None
self.video_info = None
self.editor_window = None
def open_editor(self):
"""Open the shorts editing interface"""
# Find available shorts
shorts_files = glob.glob(os.path.join(self.shorts_folder, "*.mp4"))
if not shorts_files:
messagebox.showinfo("No Shorts Found",
f"No video files found in '{self.shorts_folder}' folder.\nGenerate some shorts first!")
return
# Create editor window
self.editor_window = tk.Toplevel(self.parent)
self.editor_window.title("🎬 Shorts Editor - Professional Video Editing")
self.editor_window.geometry("800x700")
self.editor_window.transient(self.parent)
self.create_editor_interface(shorts_files)
def create_editor_interface(self, shorts_files):
"""Create the main editor interface"""
# Title
title_frame = tk.Frame(self.editor_window)
title_frame.pack(fill="x", padx=20, pady=10)
tk.Label(title_frame, text="🎬 Professional Shorts Editor",
font=("Arial", 16, "bold")).pack()
tk.Label(title_frame, text="Select and edit your generated shorts with professional tools",
font=("Arial", 10), fg="gray").pack()
# Video selection frame
selection_frame = tk.LabelFrame(self.editor_window, text="📁 Select Short to Edit", padx=10, pady=10)
selection_frame.pack(fill="x", padx=20, pady=10)
# Video list with preview info
list_frame = tk.Frame(selection_frame)
list_frame.pack(fill="x")
tk.Label(list_frame, text="Available Shorts:", font=("Arial", 10, "bold")).pack(anchor="w")
# Listbox with scrollbar
list_container = tk.Frame(list_frame)
list_container.pack(fill="x", pady=5)
self.video_listbox = tk.Listbox(list_container, height=4, font=("Courier", 9))
scrollbar = tk.Scrollbar(list_container, orient="vertical")
self.video_listbox.config(yscrollcommand=scrollbar.set)
scrollbar.config(command=self.video_listbox.yview)
self.video_listbox.pack(side="left", fill="both", expand=True)
scrollbar.pack(side="right", fill="y")
# Populate video list with file info
self.video_files = []
for video_file in sorted(shorts_files):
try:
info = VideoEditor.get_video_info(video_file)
filename = os.path.basename(video_file)
size_mb = os.path.getsize(video_file) / (1024 * 1024)
display_text = f"{filename:<20}{info['duration']:.1f}s │ {info['size'][0]}x{info['size'][1]}{size_mb:.1f}MB"
self.video_listbox.insert(tk.END, display_text)
self.video_files.append(video_file)
except Exception as e:
print(f"Error reading {video_file}: {e}")
# Video selection handler
def on_video_select(event):
selection = self.video_listbox.curselection()
if selection:
self.current_video = self.video_files[selection[0]]
self.video_info = VideoEditor.get_video_info(self.current_video)
self.update_video_info()
self.enable_editing_tools()
self.video_listbox.bind("<<ListboxSelect>>", on_video_select)
# Current video info
self.info_frame = tk.LabelFrame(self.editor_window, text="📊 Video Information", padx=10, pady=10)
self.info_frame.pack(fill="x", padx=20, pady=10)
self.info_label = tk.Label(self.info_frame, text="Select a video to see details",
font=("Courier", 9), justify="left")
self.info_label.pack(anchor="w")
# Editing tools frame
self.tools_frame = tk.LabelFrame(self.editor_window, text="🛠️ Professional Editing Tools", padx=10, pady=10)
self.tools_frame.pack(fill="both", expand=True, padx=20, pady=10)
self.create_editing_tools()
# Output and action buttons
action_frame = tk.Frame(self.editor_window)
action_frame.pack(fill="x", padx=20, pady=10)
# Output folder selection
output_folder_frame = tk.Frame(action_frame)
output_folder_frame.pack(fill="x", pady=5)
tk.Label(output_folder_frame, text="Output Folder:", font=("Arial", 9, "bold")).pack(side="left")
self.output_folder = tk.StringVar(value=os.path.join(self.shorts_folder, "edited"))
output_entry = tk.Entry(output_folder_frame, textvariable=self.output_folder, width=40)
output_entry.pack(side="left", padx=(10, 5))
tk.Button(output_folder_frame, text="Browse",
command=self.select_output_folder).pack(side="left")
# Action buttons
button_frame = tk.Frame(action_frame)
button_frame.pack(fill="x", pady=10)
tk.Button(button_frame, text="🔄 Refresh List",
command=self.refresh_video_list, bg="#2196F3", fg="white").pack(side="left", padx=5)
tk.Button(button_frame, text="📂 Open Shorts Folder",
command=self.open_shorts_folder, bg="#FF9800", fg="white").pack(side="left", padx=5)
tk.Button(button_frame, text="❌ Close Editor",
command=self.editor_window.destroy, bg="#F44336", fg="white").pack(side="right", padx=5)
def create_editing_tools(self):
"""Create the professional editing tools interface"""
# Create notebook for organized tools
notebook = ttk.Notebook(self.tools_frame)
notebook.pack(fill="both", expand=True)
# Basic Editing Tab
basic_frame = ttk.Frame(notebook)
notebook.add(basic_frame, text="✂️ Basic Editing")
# Trim Tool
trim_frame = tk.LabelFrame(basic_frame, text="✂️ Trim Video", padx=10, pady=5)
trim_frame.pack(fill="x", padx=10, pady=5)
trim_controls = tk.Frame(trim_frame)
trim_controls.pack(fill="x")
tk.Label(trim_controls, text="Start:").pack(side="left")
self.trim_start = tk.DoubleVar(value=0.0)
tk.Spinbox(trim_controls, from_=0, to=120, increment=0.1, width=8,
textvariable=self.trim_start, format="%.1f").pack(side="left", padx=5)
tk.Label(trim_controls, text="End:").pack(side="left", padx=(10, 0))
self.trim_end = tk.DoubleVar(value=5.0)
tk.Spinbox(trim_controls, from_=0, to=120, increment=0.1, width=8,
textvariable=self.trim_end, format="%.1f").pack(side="left", padx=5)
tk.Button(trim_controls, text="✂️ Trim Video",
command=self.trim_video, bg="#4CAF50", fg="white").pack(side="right", padx=10)
# Speed Tool
speed_frame = tk.LabelFrame(basic_frame, text="⚡ Speed Control", padx=10, pady=5)
speed_frame.pack(fill="x", padx=10, pady=5)
speed_controls = tk.Frame(speed_frame)
speed_controls.pack(fill="x")
tk.Label(speed_controls, text="Speed:").pack(side="left")
self.speed_factor = tk.DoubleVar(value=1.0)
speed_spinbox = tk.Spinbox(speed_controls, from_=0.1, to=5.0, increment=0.1, width=8,
textvariable=self.speed_factor, format="%.1f")
speed_spinbox.pack(side="left", padx=5)
tk.Label(speed_controls, text="(0.5=slow, 1.0=normal, 2.0=fast)").pack(side="left", padx=5)
tk.Button(speed_controls, text="⚡ Apply Speed",
command=self.adjust_speed, bg="#FF9800", fg="white").pack(side="right", padx=10)
# Effects Tab
effects_frame = ttk.Frame(notebook)
notebook.add(effects_frame, text="✨ Effects")
# Fade Effects
fade_frame = tk.LabelFrame(effects_frame, text="🌅 Fade Effects", padx=10, pady=5)
fade_frame.pack(fill="x", padx=10, pady=5)
fade_controls = tk.Frame(fade_frame)
fade_controls.pack(fill="x")
tk.Label(fade_controls, text="Fade In:").pack(side="left")
self.fade_in = tk.DoubleVar(value=0.5)
tk.Spinbox(fade_controls, from_=0, to=5, increment=0.1, width=6,
textvariable=self.fade_in, format="%.1f").pack(side="left", padx=5)
tk.Label(fade_controls, text="Fade Out:").pack(side="left", padx=(10, 0))
self.fade_out = tk.DoubleVar(value=0.5)
tk.Spinbox(fade_controls, from_=0, to=5, increment=0.1, width=6,
textvariable=self.fade_out, format="%.1f").pack(side="left", padx=5)
tk.Button(fade_controls, text="🌅 Add Fades",
command=self.add_fades, bg="#9C27B0", fg="white").pack(side="right", padx=10)
# Volume Control
volume_frame = tk.LabelFrame(effects_frame, text="🔊 Volume Control", padx=10, pady=5)
volume_frame.pack(fill="x", padx=10, pady=5)
volume_controls = tk.Frame(volume_frame)
volume_controls.pack(fill="x")
tk.Label(volume_controls, text="Volume:").pack(side="left")
self.volume_factor = tk.DoubleVar(value=1.0)
tk.Spinbox(volume_controls, from_=0, to=3, increment=0.1, width=6,
textvariable=self.volume_factor, format="%.1f").pack(side="left", padx=5)
tk.Label(volume_controls, text="(0.0=mute, 1.0=normal, 2.0=loud)").pack(side="left", padx=5)
tk.Button(volume_controls, text="🔊 Adjust Volume",
command=self.adjust_volume, bg="#3F51B5", fg="white").pack(side="right", padx=10)
# Transform Tab
transform_frame = ttk.Frame(notebook)
notebook.add(transform_frame, text="🔄 Transform")
# Resize Tool
resize_frame = tk.LabelFrame(transform_frame, text="📐 Resize Video", padx=10, pady=5)
resize_frame.pack(fill="x", padx=10, pady=5)
resize_controls = tk.Frame(resize_frame)
resize_controls.pack(fill="x")
tk.Label(resize_controls, text="Width:").pack(side="left")
self.resize_width = tk.IntVar(value=1080)
tk.Spinbox(resize_controls, from_=240, to=4320, increment=120, width=6,
textvariable=self.resize_width).pack(side="left", padx=5)
tk.Label(resize_controls, text="Height:").pack(side="left", padx=(10, 0))
self.resize_height = tk.IntVar(value=1920)
tk.Spinbox(resize_controls, from_=240, to=4320, increment=120, width=6,
textvariable=self.resize_height).pack(side="left", padx=5)
tk.Button(resize_controls, text="📐 Resize",
command=self.resize_video, bg="#607D8B", fg="white").pack(side="right", padx=10)
# Text Overlay Tab
text_frame = ttk.Frame(notebook)
notebook.add(text_frame, text="📝 Text Overlay")
text_overlay_frame = tk.LabelFrame(text_frame, text="📝 Add Text Overlay", padx=10, pady=5)
text_overlay_frame.pack(fill="x", padx=10, pady=5)
# Text input
text_input_frame = tk.Frame(text_overlay_frame)
text_input_frame.pack(fill="x", pady=5)
tk.Label(text_input_frame, text="Text:").pack(side="left")
self.overlay_text = tk.StringVar(value="Your Text Here")
tk.Entry(text_input_frame, textvariable=self.overlay_text, width=30).pack(side="left", padx=5)
# Text settings
text_settings_frame = tk.Frame(text_overlay_frame)
text_settings_frame.pack(fill="x", pady=5)
tk.Label(text_settings_frame, text="Size:").pack(side="left")
self.text_size = tk.IntVar(value=50)
tk.Spinbox(text_settings_frame, from_=20, to=150, width=6,
textvariable=self.text_size).pack(side="left", padx=5)
tk.Label(text_settings_frame, text="Position:").pack(side="left", padx=(10, 0))
self.text_position = tk.StringVar(value="center,bottom")
position_combo = ttk.Combobox(text_settings_frame, textvariable=self.text_position, width=15,
values=["center,top", "center,center", "center,bottom",
"left,top", "right,top", "left,bottom", "right,bottom"],
state="readonly")
position_combo.pack(side="left", padx=5)
# Speed/Quality options
speed_frame = tk.Frame(text_overlay_frame)
speed_frame.pack(fill="x", pady=5)
tk.Label(speed_frame, text="Processing Method:", font=("Arial", 9, "bold")).pack(side="left")
self.text_method = tk.StringVar(value="fast")
method_frame = tk.Frame(speed_frame)
method_frame.pack(side="left", padx=10)
tk.Radiobutton(method_frame, text="🚀 Fast (PIL)", variable=self.text_method,
value="fast", font=("Arial", 8)).pack(side="left")
tk.Radiobutton(method_frame, text="🎬 High Quality (MoviePy)", variable=self.text_method,
value="quality", font=("Arial", 8)).pack(side="left", padx=(10, 0))
# Info label for method explanation
method_info = tk.Label(speed_frame, text="Fast: 3-5x faster, basic text | Quality: Slower, advanced effects",
font=("Arial", 7), fg="gray")
method_info.pack(side="right")
# Button frame
button_frame = tk.Frame(text_overlay_frame)
button_frame.pack(fill="x", pady=5)
tk.Button(button_frame, text="📝 Add Text Overlay",
command=self.add_text_overlay, bg="#795548", fg="white",
font=("Arial", 10, "bold")).pack(side="right", padx=10)
# Initially disable all tools
self.disable_editing_tools()
def disable_editing_tools(self):
"""Disable all editing tools until a video is selected"""
for widget in self.tools_frame.winfo_children():
self.set_widget_state(widget, "disabled")
def enable_editing_tools(self):
"""Enable editing tools when a video is selected"""
for widget in self.tools_frame.winfo_children():
self.set_widget_state(widget, "normal")
# Update trim end time to video duration
if self.video_info:
self.trim_end.set(min(self.video_info['duration'], 30.0))
def set_widget_state(self, widget, state):
"""Recursively set widget state"""
try:
widget.config(state=state)
except:
pass
for child in widget.winfo_children():
self.set_widget_state(child, state)
def update_video_info(self):
"""Update the video information display"""
if self.video_info and self.current_video:
filename = os.path.basename(self.current_video)
info_text = f"""📁 File: {filename}
⏱️ Duration: {self.video_info['duration']:.2f} seconds
📐 Resolution: {self.video_info['size'][0]} x {self.video_info['size'][1]}
🎬 FPS: {self.video_info['fps']:.1f}
🔊 Audio: {'Yes' if self.video_info['has_audio'] else 'No'}
💾 Size: {os.path.getsize(self.current_video) / (1024*1024):.1f} MB"""
self.info_label.config(text=info_text)
def select_output_folder(self):
"""Select output folder for edited videos"""
folder = filedialog.askdirectory(title="Select Output Folder")
if folder:
self.output_folder.set(folder)
def refresh_video_list(self):
"""Refresh the list of available videos"""
self.video_listbox.delete(0, tk.END)
self.video_files.clear()
shorts_files = glob.glob(os.path.join(self.shorts_folder, "*.mp4"))
for video_file in sorted(shorts_files):
try:
info = VideoEditor.get_video_info(video_file)
filename = os.path.basename(video_file)
size_mb = os.path.getsize(video_file) / (1024 * 1024)
display_text = f"{filename:<20}{info['duration']:.1f}s │ {info['size'][0]}x{info['size'][1]}{size_mb:.1f}MB"
self.video_listbox.insert(tk.END, display_text)
self.video_files.append(video_file)
except Exception as e:
print(f"Error reading {video_file}: {e}")
def open_shorts_folder(self):
"""Open the shorts folder in file explorer"""
import subprocess
try:
subprocess.run(['explorer', os.path.abspath(self.shorts_folder)], check=True)
except:
messagebox.showinfo("Folder Location", f"Shorts folder: {os.path.abspath(self.shorts_folder)}")
def get_output_path(self, suffix):
"""Generate output path with timestamp"""
if not self.current_video:
return None
os.makedirs(self.output_folder.get(), exist_ok=True)
base_name = os.path.splitext(os.path.basename(self.current_video))[0]
timestamp = datetime.now().strftime("%H%M%S")
return os.path.join(self.output_folder.get(), f"{base_name}_{suffix}_{timestamp}.mp4")
def show_progress_dialog(self, title, operation_func):
"""Show progress dialog for editing operations"""
progress_window = tk.Toplevel(self.editor_window)
progress_window.title(title)
progress_window.geometry("400x120")
progress_window.transient(self.editor_window)
progress_window.grab_set()
tk.Label(progress_window, text=f"🎬 {title}", font=("Arial", 12, "bold")).pack(pady=10)
progress_label = tk.Label(progress_window, text="Processing video...")
progress_label.pack(pady=5)
progress_bar = ttk.Progressbar(progress_window, mode="indeterminate")
progress_bar.pack(fill="x", padx=20, pady=10)
progress_bar.start()
def run_operation():
try:
result = operation_func()
progress_window.after(0, lambda r=result: self.operation_complete(progress_window, r, title))
except Exception as error:
progress_window.after(0, lambda err=str(error): self.operation_error(progress_window, err))
threading.Thread(target=run_operation, daemon=True).start()
def operation_complete(self, progress_window, result, operation_name):
"""Handle successful operation completion"""
progress_window.destroy()
if result:
messagebox.showinfo("Success",
f"{operation_name} completed successfully!\n\n"
f"Output saved to:\n{result}")
self.refresh_video_list()
def operation_error(self, progress_window, error_msg):
"""Handle operation error"""
progress_window.destroy()
messagebox.showerror("Error", f"❌ Operation failed:\n{error_msg}")
# Editing tool methods
def trim_video(self):
"""Trim the selected video"""
if not self.current_video:
messagebox.showwarning("No Video", "Please select a video first!")
return
start = self.trim_start.get()
end = self.trim_end.get()
if start >= end:
messagebox.showwarning("Invalid Range", "Start time must be less than end time!")
return
if end > self.video_info['duration']:
messagebox.showwarning("Invalid Range", f"End time cannot exceed video duration ({self.video_info['duration']:.1f}s)!")
return
output_path = self.get_output_path("trimmed")
def trim_operation():
VideoEditor.trim_video(self.current_video, start, end, output_path)
return output_path
self.show_progress_dialog("Trimming Video", trim_operation)
def adjust_speed(self):
"""Adjust video speed"""
if not self.current_video:
messagebox.showwarning("No Video", "Please select a video first!")
return
speed = self.speed_factor.get()
if speed <= 0:
messagebox.showwarning("Invalid Speed", "Speed must be greater than 0!")
return
output_path = self.get_output_path(f"speed_{speed:.1f}x")
def speed_operation():
VideoEditor.adjust_speed(self.current_video, speed, output_path)
return output_path
self.show_progress_dialog("Adjusting Speed", speed_operation)
def add_fades(self):
"""Add fade effects"""
if not self.current_video:
messagebox.showwarning("No Video", "Please select a video first!")
return
fade_in = self.fade_in.get()
fade_out = self.fade_out.get()
output_path = self.get_output_path("faded")
def fade_operation():
return VideoEditor.add_fade_effects(self.current_video, fade_in, fade_out, output_path)
self.show_progress_dialog("Adding Fade Effects", fade_operation)
def adjust_volume(self):
"""Adjust video volume"""
if not self.current_video:
messagebox.showwarning("No Video", "Please select a video first!")
return
if not self.video_info['has_audio']:
messagebox.showwarning("No Audio", "Selected video has no audio track!")
return
volume = self.volume_factor.get()
output_path = self.get_output_path(f"volume_{volume:.1f}x")
def volume_operation():
return VideoEditor.adjust_volume(self.current_video, volume, output_path)
self.show_progress_dialog("Adjusting Volume", volume_operation)
def resize_video(self):
"""Resize video"""
if not self.current_video:
messagebox.showwarning("No Video", "Please select a video first!")
return
width = self.resize_width.get()
height = self.resize_height.get()
if width < 1 or height < 1:
messagebox.showwarning("Invalid Size", "Width and height must be positive!")
return
output_path = self.get_output_path(f"resized_{width}x{height}")
def resize_operation():
return VideoEditor.resize_video(self.current_video, width, height, output_path)
self.show_progress_dialog("Resizing Video", resize_operation)
def add_text_overlay(self):
"""Add text overlay to video with speed options"""
if not self.current_video:
messagebox.showwarning("No Video", "Please select a video first!")
return
text = self.overlay_text.get().strip()
if not text:
messagebox.showwarning("No Text", "Please enter text to overlay!")
return
position_str = self.text_position.get()
position = tuple(position_str.split(','))
size = self.text_size.get()
method = self.text_method.get()
output_path = self.get_output_path("with_text")
# Choose method based on user selection
if method == "fast":
def text_operation():
return VideoEditor.add_text_overlay_fast(self.current_video, text, position,
font_size=size, output_path=output_path)
self.show_progress_dialog("Adding Text Overlay (Fast Method)", text_operation)
else:
def text_operation():
return VideoEditor.add_text_overlay(self.current_video, text, position,
font_size=size, output_path=output_path)
self.show_progress_dialog("Adding Text Overlay (High Quality)", text_operation)
# GUI Components
class ShortsGeneratorGUI:
def __init__(self, root):
self.root = root
self.root.title("🎬 AI Shorts Generator - Advanced Video Moment Detection")
self.root.geometry("650x650") # Reduced height to eliminate empty space
self.root.minsize(600, 600) # Reduced minimum size
self.video_path = None
self.output_folder = "shorts"
self.max_clips = 3
self.threshold_db = -30
self.clip_duration = 5
self.create_widgets()
def create_widgets(self):
# Title
title_label = tk.Label(self.root, text="🎬 AI Shorts Generator", font=("Arial", 16, "bold"))
title_label.pack(pady=10)
# Video selection
video_frame = tk.Frame(self.root)
video_frame.pack(pady=10, padx=20, fill="x")
tk.Label(video_frame, text="Select Video File:").pack(anchor="w")
video_select_frame = tk.Frame(video_frame)
video_select_frame.pack(fill="x", pady=5)
self.video_label = tk.Label(video_select_frame, text="No video selected", bg="white", relief="sunken")
self.video_label.pack(side="left", fill="x", expand=True, padx=(0, 5))
tk.Button(video_select_frame, text="Browse", command=self.select_video).pack(side="right")
# Output folder selection
output_frame = tk.Frame(self.root)
output_frame.pack(pady=10, padx=20, fill="x")
tk.Label(output_frame, text="Output Folder:").pack(anchor="w")
output_select_frame = tk.Frame(output_frame)
output_select_frame.pack(fill="x", pady=5)
self.output_label = tk.Label(output_select_frame, text="shorts/", bg="white", relief="sunken")
self.output_label.pack(side="left", fill="x", expand=True, padx=(0, 5))
tk.Button(output_select_frame, text="Browse", command=self.select_output_folder).pack(side="right")
# Settings frame
settings_frame = tk.LabelFrame(self.root, text="Settings", padx=10, pady=10)
settings_frame.pack(pady=10, padx=20, fill="x")
# Max clips with on/off toggle
clips_frame = tk.Frame(settings_frame)
clips_frame.pack(fill="x", pady=5)
clips_left_frame = tk.Frame(clips_frame)
clips_left_frame.pack(side="left")
self.use_max_clips = tk.BooleanVar(value=True)
clips_checkbox = tk.Checkbutton(clips_left_frame, variable=self.use_max_clips, text="Max Clips to Generate:")
clips_checkbox.pack(side="left")
self.clips_var = tk.IntVar(value=3)
self.clips_spinbox = tk.Spinbox(clips_frame, from_=1, to=10, width=5, textvariable=self.clips_var)
self.clips_spinbox.pack(side="right")
# Bind checkbox to enable/disable spinbox
def toggle_clips_limit():
if self.use_max_clips.get():
self.clips_spinbox.config(state="normal")
else:
self.clips_spinbox.config(state="disabled")
self.use_max_clips.trace("w", lambda *args: toggle_clips_limit())
clips_checkbox.config(command=toggle_clips_limit)
# Add tooltip for max clips setting
clips_tooltip_text = """Max Clips Control:
• Checked: Limit the number of clips generated
• Unchecked: Generate all detected moments
• 1-3 clips: Quick highlights for social media
• 4-6 clips: Good variety pack
• 7-10 clips: Comprehensive highlight reel
Tip: Start with 3 clips, then increase if you want more content"""
ToolTip(self.clips_spinbox, clips_tooltip_text, side='right')
ToolTip(clips_checkbox, clips_tooltip_text, side='right')
# Detection Mode Selection
detection_frame = tk.Frame(settings_frame)
detection_frame.pack(fill="x", pady=5)
tk.Label(detection_frame, text="Detection Mode:", font=("Arial", 9, "bold")).pack(side="left")
self.detection_mode_var = tk.StringVar(value="loud")
self.detection_display_var = tk.StringVar(value="🔊 Loud Moments")
detection_dropdown = ttk.Combobox(detection_frame, textvariable=self.detection_display_var,
values=["🔊 Loud Moments", "🎬 Scene Changes", "🏃 Motion Intensity",
"😄 Emotional Speech", "🎵 Audio Peaks", "🎯 Smart Combined"],
state="readonly", width=22)
detection_dropdown.pack(side="right")
# Store the mapping between display text and internal values
self.mode_mapping = {
"🔊 Loud Moments": "loud",
"🎬 Scene Changes": "scene",
"🏃 Motion Intensity": "motion",
"😄 Emotional Speech": "speech",
"🎵 Audio Peaks": "peaks",
"🎯 Smart Combined": "combined"
}
# Simple, clear descriptions for mode tooltips
mode_descriptions = {
"🔊 Loud Moments": """Analyzes audio volume levels to find the loudest parts of your video.
• Best for: Gaming reactions, music highlights, shouting moments
• Finds: High-volume audio segments above the threshold
• Ideal when: Your video has clear volume differences
• Tip: Adjust threshold if too many/few moments found""",
"🎬 Scene Changes": """Detects dramatic visual transitions and cuts in your video.
• Best for: Movie trailers, montages, location changes
• Finds: Major visual shifts between frames
• Ideal when: Video has multiple scenes or camera angles
• Tip: Great for content with quick cuts or transitions""",
"🏃 Motion Intensity": """Analyzes movement and action within video frames.
• Best for: Sports highlights, dance videos, action scenes
• Finds: High-movement moments with lots of visual activity
• Ideal when: Video contains physical action or movement
• Tip: Perfect for extracting the most dynamic moments""",
"😄 Emotional Speech": """Uses AI to detect excited, emotional, or emphatic speech patterns.
• Best for: Reactions, reviews, commentary, tutorials
• Finds: Words like 'wow', 'amazing', exclamations, excited tone
• Ideal when: Video has spoken content with emotional moments
• Tip: Captures the most engaging verbal reactions""",
"🎵 Audio Peaks": """Detects sudden audio spikes like bass drops, impacts, or sound effects.
• Best for: Music videos, sound effect moments, beat drops
• Finds: Sharp increases in audio frequency or volume
• Ideal when: Video has musical elements or sound effects
• Tip: Great for rhythm-based or audio-driven content""",
"🎯 Smart Combined": """Intelligently combines all detection methods for optimal results.
• Best for: Any video type, general content, unsure what to use
• Finds: Moments scoring high across multiple analysis methods
• Ideal when: You want the most 'interesting' overall moments
• Tip: Recommended starting point for most videos"""
}
# Create tooltip for the dropdown (updates when selection changes)
current_tooltip_text = mode_descriptions["🔊 Loud Moments"] # Default
dropdown_tooltip = ToolTip(detection_dropdown, current_tooltip_text)
# Update tooltip when selection changes
def on_detection_change(event):
selection = detection_dropdown.get()
mode_map = {
"🔊 Loud Moments": "loud",
"🎬 Scene Changes": "scene",
"🏃 Motion Intensity": "motion",
"😄 Emotional Speech": "speech",
"🎵 Audio Peaks": "peaks",
"🎯 Smart Combined": "combined"
}
self.detection_mode_var.set(mode_map.get(selection, "loud"))
# Update tooltip text for the selected mode
dropdown_tooltip.text = mode_descriptions.get(selection, "Select a detection mode")
# Show/hide threshold setting based on mode
if selection == "🔊 Loud Moments":
threshold_frame.pack(fill="x", pady=5)
else:
threshold_frame.pack_forget()
detection_dropdown.bind("<<ComboboxSelected>>", on_detection_change)
# Audio threshold (only shown for loud moments)
threshold_frame = tk.Frame(settings_frame)
threshold_frame.pack(fill="x", pady=5)
threshold_label = tk.Label(threshold_frame, text="Audio Threshold (dB):")
threshold_label.pack(side="left")
self.threshold_var = tk.IntVar(value=-30)
threshold_spinbox = tk.Spinbox(threshold_frame, from_=-50, to=0, width=5, textvariable=self.threshold_var)
threshold_spinbox.pack(side="right")
# Add tooltip for threshold setting
threshold_tooltip_text = """Audio Threshold Control:
• Higher values (closer to 0): Only very loud moments
• Lower values (closer to -50): More moments detected
• Default -30 dB: Good balance for most videos
• Adjust based on your video's audio levels
Example: Gaming videos might need -20 dB, quiet vlogs might need -40 dB"""
ToolTip(threshold_spinbox, threshold_tooltip_text, side='right')
# Clip duration (increased to 120 seconds max)
duration_frame = tk.Frame(settings_frame)
duration_frame.pack(fill="x", pady=5)
duration_label = tk.Label(duration_frame, text="Clip Duration (seconds):")
duration_label.pack(side="left")
self.duration_var = tk.IntVar(value=5)
duration_spinbox = tk.Spinbox(duration_frame, from_=3, to=120, width=5, textvariable=self.duration_var)
duration_spinbox.pack(side="right")
# Add tooltip for duration setting
duration_tooltip_text = """Clip Duration Setting:
• 3-10 seconds: Perfect for TikTok/Instagram Reels
• 10-30 seconds: Good for YouTube Shorts
• 30-60 seconds: Longer form highlights
• 60+ seconds: Extended content clips
Shorter clips = more viral potential
Longer clips = more context and story"""
ToolTip(duration_spinbox, duration_tooltip_text, side='right')
# Preview button
self.preview_btn = tk.Button(self.root, text="🔍 Preview Clips",
command=self.preview_clips, bg="#2196F3", fg="white",
font=("Arial", 10, "bold"), pady=5)
self.preview_btn.pack(pady=5)
# Add tooltip for preview button
preview_tooltip_text = """Preview Clips Feature:
• Analyzes your video using the selected detection mode
• Shows all detected moments with timestamps
• Lets you select specific clips to generate
• No video files created - just analysis
• Great for testing settings before full generation
Tip: Always preview first to see what the AI finds!"""
ToolTip(self.preview_btn, preview_tooltip_text, side='right')
# Generate button
self.generate_btn = tk.Button(self.root, text="🎬 Generate Shorts",
command=self.start_generation, bg="#4CAF50", fg="white",
font=("Arial", 12, "bold"), pady=10)
self.generate_btn.pack(pady=10)
# Add tooltip for generate button
generate_tooltip_text = """Generate Shorts Feature:
• Creates actual video files from detected moments
• Adds AI-generated subtitles to each clip
• Formats videos for vertical social media (1080x1920)
• Saves clips to your selected output folder
• Takes longer but creates ready-to-post content
Tip: Use Preview first to fine-tune your settings!"""
ToolTip(self.generate_btn, generate_tooltip_text, side='right')
# Edit Shorts button
self.edit_btn = tk.Button(self.root, text="✏️ Edit Generated Shorts",
command=self.open_shorts_editor, bg="#FF9800", fg="white",
font=("Arial", 11, "bold"), pady=8)
self.edit_btn.pack(pady=5)
# Add tooltip for edit button
edit_tooltip_text = """Professional Shorts Editor:
• Select any generated short for editing
• Trim, speed up/slow down videos
• Add fade in/out effects
• Adjust volume levels
• Resize and crop videos
• Add custom text overlays
• Real-time preview and professional tools
Transform your shorts into perfect content!"""
ToolTip(self.edit_btn, edit_tooltip_text, side='right')
# Thumbnail Editor button
self.thumbnail_btn = tk.Button(self.root, text="📸 Create Thumbnails",
command=self.open_thumbnail_editor, bg="#9C27B0", fg="white",
font=("Arial", 11, "bold"), pady=8)
self.thumbnail_btn.pack(pady=5)
# Add tooltip for thumbnail button
thumbnail_tooltip_text = """Professional Thumbnail Editor:
• Select any video to create custom thumbnails
• Choose the perfect frame with timeline slider
• Add text overlays with custom fonts and colors
• Add stickers and emojis for eye-catching designs
• Drag and drop positioning
• High-quality export (JPEG/PNG)
• Perfect for YouTube, TikTok, Instagram
Create thumbnails that get clicks!"""
ToolTip(self.thumbnail_btn, thumbnail_tooltip_text, side='right')
# Progress frame
progress_frame = tk.Frame(self.root)
progress_frame.pack(pady=5, padx=20, fill="x")
self.progress_label = tk.Label(progress_frame, text="Ready to generate shorts")
self.progress_label.pack()
self.progress_bar = ttk.Progressbar(progress_frame, length=400, mode="determinate")
self.progress_bar.pack(pady=3)
def select_video(self):
file_path = filedialog.askopenfilename(
title="Select Video File",
filetypes=[("Video files", "*.mp4 *.mov *.avi *.mkv *.wmv")]
)
if file_path:
self.video_path = file_path
self.video_label.config(text=os.path.basename(file_path))
def select_output_folder(self):
folder_path = filedialog.askdirectory(title="Select Output Folder")
if folder_path:
self.output_folder = folder_path
self.output_label.config(text=folder_path)
def preview_clips(self):
if not self.video_path:
messagebox.showwarning("Warning", "Please select a video file first!")
return
try:
# Validate video first
validate_video(self.video_path, min_duration=self.duration_var.get() * 2)
# Analyze using selected detection mode
self.preview_btn.config(state="disabled", text="Analyzing...")
self.root.update()
detection_mode = self.detection_mode_var.get()
if detection_mode == "loud":
moments = detect_loud_moments(
self.video_path,
chunk_duration=self.duration_var.get(),
threshold_db=self.threshold_var.get()
)
mode_name = "loud moments"
elif detection_mode == "scene":
moments = detect_scene_changes(self.video_path, chunk_duration=self.duration_var.get())
mode_name = "scene changes"
elif detection_mode == "motion":
moments = detect_motion_intensity(self.video_path, chunk_duration=self.duration_var.get())
mode_name = "motion moments"
elif detection_mode == "speech":
moments = detect_speech_emotion(self.video_path, chunk_duration=self.duration_var.get())
mode_name = "emotional speech"
elif detection_mode == "peaks":
moments = detect_audio_peaks(self.video_path, chunk_duration=self.duration_var.get())
mode_name = "audio peaks"
elif detection_mode == "combined":
moments = detect_combined_intensity(self.video_path, chunk_duration=self.duration_var.get())
mode_name = "interesting moments"
else:
moments = detect_loud_moments(
self.video_path,
chunk_duration=self.duration_var.get(),
threshold_db=self.threshold_var.get()
)
mode_name = "loud moments"
if not moments:
messagebox.showinfo("Preview", f"No {mode_name} found.\nTry a different detection mode or adjust settings.")
return
# Show preview window
preview_window = tk.Toplevel(self.root)
preview_window.title("Preview and Select Clips")
preview_window.geometry("500x400")
tk.Label(preview_window, text=f"Found {len(moments)} {mode_name}:", font=("Arial", 12, "bold")).pack(pady=10)
# Create scrollable frame for checkboxes
canvas = tk.Canvas(preview_window)
scrollbar = tk.Scrollbar(preview_window, orient="vertical", command=canvas.yview)
scrollable_frame = tk.Frame(canvas)
scrollable_frame.bind(
"<Configure>",
lambda e: canvas.configure(scrollregion=canvas.bbox("all"))
)
canvas.create_window((0, 0), window=scrollable_frame, anchor="nw")
canvas.configure(yscrollcommand=scrollbar.set)
# Store checkbox variables and clip data
self.clip_vars = []
# Use all clips if max clips is disabled, otherwise limit by setting
clips_to_show = moments if not self.use_max_clips.get() else moments[:self.clips_var.get()]
self.preview_clips_data = clips_to_show
# Add selectable clips with checkboxes
for i, (start, end) in enumerate(self.preview_clips_data, 1):
duration = end - start
time_str = f"Clip {i}: {start//60:02.0f}:{start%60:05.2f} - {end//60:02.0f}:{end%60:05.2f} ({duration:.1f}s)"
clip_var = tk.BooleanVar(value=True) # Default selected
self.clip_vars.append(clip_var)
clip_frame = tk.Frame(scrollable_frame)
clip_frame.pack(fill="x", padx=10, pady=2)
checkbox = tk.Checkbutton(clip_frame, variable=clip_var, text=time_str,
font=("Courier", 10), anchor="w")
checkbox.pack(fill="x")
canvas.pack(side="left", fill="both", expand=True, padx=10, pady=5)
scrollbar.pack(side="right", fill="y")
# Button frame
button_frame = tk.Frame(preview_window)
button_frame.pack(fill="x", padx=10, pady=10)
# Select/Deselect all buttons
control_frame = tk.Frame(button_frame)
control_frame.pack(fill="x", pady=5)
tk.Button(control_frame, text="Select All",
command=lambda: [var.set(True) for var in self.clip_vars]).pack(side="left", padx=5)
tk.Button(control_frame, text="Deselect All",
command=lambda: [var.set(False) for var in self.clip_vars]).pack(side="left", padx=5)
# Generate selected clips button (fixed size for full text visibility)
generate_selected_btn = tk.Button(button_frame, text="🎬 Generate Selected Clips",
command=lambda: self.generate_selected_clips(preview_window),
bg="#4CAF50", fg="white", font=("Arial", 11, "bold"),
pady=8, width=25)
generate_selected_btn.pack(fill="x", pady=5)
# Close button
tk.Button(button_frame, text="Close", command=preview_window.destroy).pack(pady=5)
except Exception as e:
messagebox.showerror("Preview Error", f"Error analyzing video: {str(e)}")
finally:
self.preview_btn.config(state="normal", text="🔍 Preview Clips")
def generate_selected_clips(self, preview_window):
"""Generate only the selected clips from preview"""
try:
# Get selected clips
selected_clips = []
for i, (clip_var, clip_data) in enumerate(zip(self.clip_vars, self.preview_clips_data)):
if clip_var.get():
selected_clips.append((i+1, clip_data)) # (clip_number, (start, end))
if not selected_clips:
messagebox.showwarning("Warning", "Please select at least one clip to generate!")
return
# Close preview window
preview_window.destroy()
# Show confirmation
clip_count = len(selected_clips)
clip_numbers = [str(num) for num, _ in selected_clips]
confirm_msg = f"Generate {clip_count} selected clips (#{', #'.join(clip_numbers)})?"
if not messagebox.askyesno("Confirm Generation", confirm_msg):
return
# Start generation in background thread
self.selected_clips_data = [clip_data for _, clip_data in selected_clips]
self.generate_btn.config(state="disabled", text="Generating Selected...")
thread = threading.Thread(target=self.selected_generation_worker)
thread.daemon = True
thread.start()
except Exception as e:
messagebox.showerror("Generation Error", f"Error starting generation: {str(e)}")
def selected_generation_worker(self):
"""Generate only selected clips"""
try:
# Check available disk space
import shutil
free_space_gb = shutil.disk_usage(self.output_folder)[2] / (1024**3)
if free_space_gb < 1:
raise RuntimeError(f"Insufficient disk space. Only {free_space_gb:.1f} GB available. Need at least 1 GB.")
# Validate video first
try:
video_duration = validate_video(self.video_path, min_duration=self.duration_var.get() * 2)
self.update_progress(f"✅ Video validated ({video_duration:.1f}s)", 5)
except Exception as e:
self.update_progress(f"❌ Video validation failed", 0)
raise e
os.makedirs(self.output_folder, exist_ok=True)
selected_count = len(self.selected_clips_data)
self.update_progress(f"📊 Generating {selected_count} selected clips", 10)
for i, (start, end) in enumerate(self.selected_clips_data):
self.update_progress(f"🗣️ Transcribing clip {i+1}/{selected_count}", 20 + (i * 30))
subtitles = transcribe_and_extract_subtitles(self.video_path, start, end)
out_path = os.path.join(self.output_folder, f"short_{i+1}.mp4")
self.update_progress(f"🎬 Creating video {i+1}/{selected_count}", 40 + (i * 30))
create_short_clip(self.video_path, start, end, subtitles, out_path)
self.update_progress("✅ Selected clips generated successfully!", 100)
messagebox.showinfo("Success", f"Successfully generated {selected_count} selected clips in '{self.output_folder}' folder!")
except FileNotFoundError as e:
messagebox.showerror("File Error", str(e))
except ValueError as e:
messagebox.showerror("Video Error", str(e))
except RuntimeError as e:
messagebox.showerror("System Error", str(e))
except Exception as e:
messagebox.showerror("Error", f"An unexpected error occurred: {str(e)}")
finally:
self.generate_btn.config(state="normal", text="🎬 Generate Shorts")
self.progress_bar["value"] = 0
self.progress_label.config(text="Ready to generate shorts")
def update_progress(self, message, percent):
self.progress_label.config(text=message)
self.progress_bar["value"] = percent
self.root.update()
def generation_worker(self):
try:
# Check available disk space
import shutil
free_space_gb = shutil.disk_usage(self.output_folder)[2] / (1024**3)
if free_space_gb < 1:
raise RuntimeError(f"Insufficient disk space. Only {free_space_gb:.1f} GB available. Need at least 1 GB.")
generate_shorts(
self.video_path,
max_clips=self.clips_var.get() if self.use_max_clips.get() else 10, # Default max for non-loud modes
output_folder=self.output_folder,
progress_callback=self.update_progress,
threshold_db=self.threshold_var.get(),
clip_duration=self.duration_var.get(),
detection_mode=self.detection_mode_var.get()
)
messagebox.showinfo("Success", f"Successfully generated shorts in '{self.output_folder}' folder!")
except FileNotFoundError as e:
messagebox.showerror("File Error", str(e))
except ValueError as e:
messagebox.showerror("Video Error", str(e))
except RuntimeError as e:
messagebox.showerror("System Error", str(e))
except Exception as e:
messagebox.showerror("Error", f"An unexpected error occurred: {str(e)}")
finally:
self.generate_btn.config(state="normal", text="🎬 Generate Shorts")
self.progress_bar["value"] = 0
self.progress_label.config(text="Ready to generate shorts")
def start_generation(self):
if not self.video_path:
messagebox.showwarning("Warning", "Please select a video file first!")
return
self.generate_btn.config(state="disabled", text="Generating...")
thread = threading.Thread(target=self.generation_worker)
thread.daemon = True
thread.start()
def open_shorts_editor(self):
"""Open the professional shorts editor"""
editor = ShortsEditorGUI(self.root, self.output_folder)
editor.open_editor()
def open_thumbnail_editor(self):
"""Open the professional thumbnail editor"""
# Import the thumbnail editor
try:
import subprocess
import sys
# Check if there are any video files to work with
video_files = []
# Check for original video
if self.video_path:
video_files.append(("Original Video", self.video_path))
# Check for generated shorts
if os.path.exists(self.output_folder):
import glob
shorts = glob.glob(os.path.join(self.output_folder, "*.mp4"))
for short in shorts:
video_files.append((os.path.basename(short), short))
if not video_files:
messagebox.showinfo("No Videos Found",
"Please select a video or generate some shorts first!")
return
# If only one video, open it directly
if len(video_files) == 1:
selected_video = video_files[0][1]
else:
# Let user choose which video to edit
choice_window = tk.Toplevel(self.root)
choice_window.title("Select Video for Thumbnail")
choice_window.geometry("400x300")
choice_window.transient(self.root)
choice_window.grab_set()
tk.Label(choice_window, text="📸 Select Video for Thumbnail Creation",
font=("Arial", 12, "bold")).pack(pady=10)
selected_video = None
def on_video_select(video_path):
nonlocal selected_video
selected_video = video_path
choice_window.destroy()
# Create list of videos
for display_name, video_path in video_files:
btn = tk.Button(choice_window, text=f"📹 {display_name}",
command=lambda vp=video_path: on_video_select(vp),
font=("Arial", 10), pady=5, width=40)
btn.pack(pady=2, padx=20, fill="x")
tk.Button(choice_window, text="Cancel",
command=choice_window.destroy).pack(pady=10)
# Wait for selection
choice_window.wait_window()
if not selected_video:
return
# Import and open thumbnail editor
from thumbnail_editor import open_thumbnail_editor
open_thumbnail_editor(selected_video)
except ImportError as e:
messagebox.showerror("Thumbnail Editor Error",
f"Could not load thumbnail editor:\n{str(e)}\n\nMake sure thumbnail_editor.py is in the same folder.")
except Exception as e:
messagebox.showerror("Error", f"Failed to open thumbnail editor:\n{str(e)}")
def run_gui():
root = tk.Tk()
app = ShortsGeneratorGUI(root)
root.mainloop()
if __name__ == "__main__":
import sys
if len(sys.argv) > 1 and sys.argv[1] != "--gui":
# Run command line mode
try:
generate_shorts(sys.argv[1])
print("✅ Shorts generation completed successfully!")
except Exception as e:
print(f"❌ Error: {str(e)}")
else:
# Run GUI mode (default)
run_gui()