feat: Implement subtitle positioning tool with GUI

- Added app.py for subtitle positioning tool using Tkinter and MoviePy.
- Integrated font selection and adjustable subtitle positioning.
- Implemented loading and saving of presets in JSON format.
- Added functionality to preview subtitles on video clips.
- Enhanced subtitle rendering with highlight effects.
- Created app2.py for advanced subtitle handling with SRT file support.
- Implemented SRT parsing and subtitle navigation in app2.py.
- Added system font detection for better font compatibility.
- Updated shorts_generator2.py to include GUI for shorts generation.
- Enhanced error handling and progress tracking in shorts generation.
- Created subtitle_generator.py for automatic subtitle generation from video.
- Added progress bar and user feedback in subtitle generation GUI.
- Updated subtitle_gui_presets.json and subtitles.srt for testing.
This commit is contained in:
klop51 2025-08-06 20:41:10 +02:00
parent 79e85cb8ff
commit c82130ec6e
6 changed files with 1080 additions and 8 deletions

175
app.py Normal file
View File

@ -0,0 +1,175 @@
import tkinter as tk
from tkinter import filedialog
from moviepy import VideoFileClip, TextClip, CompositeVideoClip
import threading
import json
# Global settings with defaults
settings = {
"subtitle_y_px": 1550,
"highlight_offset": -8,
"font_size_subtitle": 65,
"font_size_highlight": 68,
"highlight_x_offset": 0,
"video_path": None,
"selected_font": "Arial" # Default font
}
# Compatible fonts that work across different systems
COMPATIBLE_FONTS = [
"Arial",
"Times-Roman",
"Helvetica",
"Courier",
"Comic-Sans-MS",
"Impact",
"Verdana",
"Tahoma",
"Georgia",
"Trebuchet-MS"
]
preset_file = "subtitle_gui_presets.json"
def save_presets():
with open(preset_file, "w") as f:
json.dump(settings, f)
print("💾 Presets saved!")
def load_presets():
global settings
try:
with open(preset_file, "r") as f:
loaded = json.load(f)
settings.update(loaded)
print("✅ Presets loaded!")
sync_gui()
except FileNotFoundError:
print("⚠️ No presets found.")
def sync_gui():
sub_y_slider.set(settings["subtitle_y_px"])
highlight_slider.set(settings["highlight_offset"])
highlight_x_slider.set(settings["highlight_x_offset"])
sub_font_slider.set(settings["font_size_subtitle"])
highlight_font_slider.set(settings["font_size_highlight"])
font_var.set(settings["selected_font"])
def render_preview():
if not settings["video_path"]:
print("⚠️ No video selected.")
return
clip = VideoFileClip(settings["video_path"]).subclipped(0, 3) # Use first 3 seconds
vertical_clip = clip.resized(height=1920).cropped(width=1080, x_center=clip.w / 2)
subtitle_text = "THIS IS A TEST SUBTITLE"
highlight_word = "SUBTITLE"
base_subtitle = TextClip(
text=subtitle_text,
font_size=settings["font_size_subtitle"],
font=settings["selected_font"],
color='white',
stroke_color='black',
stroke_width=5
).with_duration(3).with_position(('center', settings["subtitle_y_px"]))
# Compute highlight word position
full_text = subtitle_text.upper()
words = full_text.split()
highlight_index = words.index(highlight_word.upper())
chars_before = sum(len(w) + 1 for w in words[:highlight_index])
char_width = 35
total_width = len(full_text) * char_width
x_offset = (chars_before * char_width) - (total_width // 2) + settings["highlight_x_offset"]
highlighted_word = TextClip(
text=highlight_word,
font_size=settings["font_size_highlight"],
font=settings["selected_font"],
color='#FFD700',
stroke_color='#FF6B35',
stroke_width=5
).with_duration(1.5).with_start(0.75).with_position((540 + x_offset, settings["subtitle_y_px"] + settings["highlight_offset"]))
final = CompositeVideoClip([vertical_clip, base_subtitle, highlighted_word], size=(1080, 1920))
# Scale down the preview to fit 1080p monitor (max height ~900px to leave room for taskbar)
preview_scale = 900 / 1920 # Scale factor to fit height
preview_width = int(1080 * preview_scale)
preview_height = int(1920 * preview_scale)
preview_clip = final.resized((preview_width, preview_height))
preview_clip.preview(fps=24, audio=False)
clip.close()
final.close()
preview_clip.close()
def update_setting(var_name, value):
settings[var_name] = int(value)
def update_font(font_name):
settings["selected_font"] = font_name
def open_video():
file_path = filedialog.askopenfilename(filetypes=[("MP4 files", "*.mp4")])
if file_path:
settings["video_path"] = file_path
print(f"📂 Loaded video: {file_path}")
def start_preview_thread():
threading.Thread(target=render_preview).start()
# GUI Setup
root = tk.Tk()
root.title("Subtitle Positioning Tool")
root.geometry("400x600")
load_btn = tk.Button(root, text="🎥 Load Video", command=open_video)
load_btn.pack(pady=10)
tk.Label(root, text="Font Family").pack()
font_var = tk.StringVar(value=settings["selected_font"])
font_dropdown = tk.OptionMenu(root, font_var, *COMPATIBLE_FONTS, command=update_font)
font_dropdown.pack(pady=5)
tk.Label(root, text="Subtitle Y Position").pack()
sub_y_slider = tk.Scale(root, from_=1000, to=1800, orient="horizontal",
command=lambda v: update_setting("subtitle_y_px", v))
sub_y_slider.set(settings["subtitle_y_px"])
sub_y_slider.pack()
tk.Label(root, text="Highlight Y Offset").pack()
highlight_slider = tk.Scale(root, from_=-100, to=100, orient="horizontal",
command=lambda v: update_setting("highlight_offset", v))
highlight_slider.set(settings["highlight_offset"])
highlight_slider.pack()
tk.Label(root, text="Highlight X Offset").pack()
highlight_x_slider = tk.Scale(root, from_=-300, to=300, orient="horizontal",
command=lambda v: update_setting("highlight_x_offset", v))
highlight_x_slider.set(settings["highlight_x_offset"])
highlight_x_slider.pack()
tk.Label(root, text="Subtitle Font Size").pack()
sub_font_slider = tk.Scale(root, from_=30, to=100, orient="horizontal",
command=lambda v: update_setting("font_size_subtitle", v))
sub_font_slider.set(settings["font_size_subtitle"])
sub_font_slider.pack()
tk.Label(root, text="Highlight Font Size").pack()
highlight_font_slider = tk.Scale(root, from_=30, to=100, orient="horizontal",
command=lambda v: update_setting("font_size_highlight", v))
highlight_font_slider.set(settings["font_size_highlight"])
highlight_font_slider.pack()
preview_btn = tk.Button(root, text="▶️ Preview Clip", command=start_preview_thread)
preview_btn.pack(pady=10)
save_btn = tk.Button(root, text="💾 Save Preset", command=save_presets)
save_btn.pack(pady=5)
load_preset_btn = tk.Button(root, text="📂 Load Preset", command=load_presets)
load_preset_btn.pack(pady=5)
root.mainloop()

322
app2.py Normal file
View File

@ -0,0 +1,322 @@
import tkinter as tk
from tkinter import filedialog
from moviepy import VideoFileClip, TextClip, CompositeVideoClip
import threading
import json
import re
import os
import platform
def get_system_fonts():
"""Get list of available system fonts"""
fonts = []
if platform.system() == "Windows":
# Common Windows font paths
font_paths = [
"C:/Windows/Fonts/",
"C:/Windows/System32/Fonts/"
]
common_fonts = []
for font_path in font_paths:
if os.path.exists(font_path):
for file in os.listdir(font_path):
if file.endswith(('.ttf', '.otf')):
# Extract font name without extension
font_name = os.path.splitext(file)[0]
# Clean up common variations
if 'arial' in font_name.lower() and 'bold' not in font_name.lower():
common_fonts.append('arial.ttf')
elif 'times' in font_name.lower() and 'bold' not in font_name.lower():
common_fonts.append('times.ttf')
elif 'courier' in font_name.lower() and 'bold' not in font_name.lower():
common_fonts.append('cour.ttf')
elif 'comic' in font_name.lower():
common_fonts.append('comic.ttf')
elif 'impact' in font_name.lower():
common_fonts.append('impact.ttf')
elif 'verdana' in font_name.lower():
common_fonts.append('verdana.ttf')
elif 'tahoma' in font_name.lower():
common_fonts.append('tahoma.ttf')
# Add found fonts, fallback to common Windows fonts
fonts = list(set(common_fonts)) if common_fonts else [
'arial.ttf', 'times.ttf', 'cour.ttf', 'comic.ttf',
'impact.ttf', 'verdana.ttf', 'tahoma.ttf'
]
# Add option to use no font (system default)
fonts.insert(0, 'System Default')
return fonts
AVAILABLE_FONTS = get_system_fonts()
# Global settings with defaults
settings = {
"subtitle_y_px": 1550,
"highlight_offset": -8,
"font_size_subtitle": 65,
"font_size_highlight": 68,
"highlight_x_offset": 0,
"video_path": None,
"font": "System Default",
"subtitles": [],
"current_index": 0
}
# Compatible fonts that work across different systems
COMPATIBLE_FONTS = [
"Arial",
"Times-Roman",
"Helvetica",
"Courier",
"Comic-Sans-MS",
"Impact",
"Verdana",
"Tahoma",
"Georgia",
"Trebuchet-MS"
]
preset_file = "subtitle_gui_presets.json"
# === SRT PARSER ===
def parse_srt(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
contents = f.read()
pattern = r"(\d+)\s+(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})\s+([\s\S]*?)(?=\n\d+|\Z)"
matches = re.findall(pattern, contents)
subtitles = []
for _, start, end, text in matches:
subtitles.append({
"start": srt_time_to_seconds(start),
"end": srt_time_to_seconds(end),
"text": text.replace('\n', ' ')
})
return subtitles
def srt_time_to_seconds(time_str):
h, m, s_ms = time_str.split(':')
s, ms = s_ms.split(',')
return int(h)*3600 + int(m)*60 + int(s) + int(ms)/1000
# === PRESETS ===
def save_presets():
with open(preset_file, "w") as f:
json.dump(settings, f)
print("📂 Presets saved!")
def load_presets():
global settings
try:
with open(preset_file, "r") as f:
loaded = json.load(f)
settings.update(loaded)
print("✅ Presets loaded!")
sync_gui()
except FileNotFoundError:
print("⚠️ No presets found.")
# === SYNC ===
def sync_gui():
sub_y_slider.set(settings["subtitle_y_px"])
highlight_slider.set(settings["highlight_offset"])
highlight_x_slider.set(settings["highlight_x_offset"])
sub_font_slider.set(settings["font_size_subtitle"])
highlight_font_slider.set(settings["font_size_highlight"])
font_dropdown_var.set(settings["font"])
def render_preview():
if not settings["video_path"] or not settings["subtitles"]:
print("⚠️ Video or subtitles not loaded.")
return
sub = settings["subtitles"][settings["current_index"]]
subtitle_text = sub["text"]
start_time = sub["start"]
end_time = sub["end"]
duration = end_time - start_time
clip = VideoFileClip(settings["video_path"]).subclipped(start_time, end_time)
vertical_clip = clip.resized(height=1920).cropped(width=1080, x_center=clip.w / 2)
highlight_word = subtitle_text.split()[-1] # Highlight last word for now
# Create TextClip with font if specified, otherwise use system default
if settings["font"] == "System Default":
base_subtitle = TextClip(
text=subtitle_text,
font_size=settings["font_size_subtitle"],
color='white',
stroke_color='black',
stroke_width=5
).with_duration(duration).with_position(('center', settings["subtitle_y_px"]))
else:
try:
base_subtitle = TextClip(
text=subtitle_text,
font=settings["font"],
font_size=settings["font_size_subtitle"],
color='white',
stroke_color='black',
stroke_width=5
).with_duration(duration).with_position(('center', settings["subtitle_y_px"]))
except:
# Fallback to system default if font fails
print(f"⚠️ Font {settings['font']} failed, using system default")
base_subtitle = TextClip(
text=subtitle_text,
font_size=settings["font_size_subtitle"],
color='white',
stroke_color='black',
stroke_width=5
).with_duration(duration).with_position(('center', settings["subtitle_y_px"]))
full_text = subtitle_text.upper()
words = full_text.split()
try:
highlight_index = words.index(highlight_word.upper())
except ValueError:
highlight_index = len(words) - 1
chars_before = sum(len(w) + 1 for w in words[:highlight_index])
char_width = 35
total_width = len(full_text) * char_width
x_offset = (chars_before * char_width) - (total_width // 2) + settings["highlight_x_offset"]
# Create highlighted word with same font logic
if settings["font"] == "System Default":
highlighted_word = TextClip(
text=highlight_word,
font_size=settings["font_size_highlight"],
color='#FFD700',
stroke_color='#FF6B35',
stroke_width=5
).with_duration(duration / 2).with_start(duration / 4).with_position((540 + x_offset, settings["subtitle_y_px"] + settings["highlight_offset"]))
else:
try:
highlighted_word = TextClip(
text=highlight_word,
font=settings["font"],
font_size=settings["font_size_highlight"],
color='#FFD700',
stroke_color='#FF6B35',
stroke_width=5
).with_duration(duration / 2).with_start(duration / 4).with_position((540 + x_offset, settings["subtitle_y_px"] + settings["highlight_offset"]))
except:
# Fallback to system default if font fails
highlighted_word = TextClip(
text=highlight_word,
font_size=settings["font_size_highlight"],
color='#FFD700',
stroke_color='#FF6B35',
stroke_width=5
).with_duration(duration / 2).with_start(duration / 4).with_position((540 + x_offset, settings["subtitle_y_px"] + settings["highlight_offset"]))
final = CompositeVideoClip([vertical_clip, base_subtitle, highlighted_word], size=(1080, 1920))
# Scale down the preview to fit 1080p monitor (max height ~900px to leave room for taskbar)
preview_scale = 900 / 1920 # Scale factor to fit height
preview_width = int(1080 * preview_scale)
preview_height = int(1920 * preview_scale)
preview_clip = final.resized((preview_width, preview_height))
preview_clip.preview(fps=24, audio=False)
clip.close()
final.close()
preview_clip.close()
def update_setting(var_name, value):
settings[var_name] = int(value) if var_name.startswith("font_size") or "offset" in var_name or "y_px" in var_name else value
def update_font(value):
settings["font"] = value
def open_video():
file_path = filedialog.askopenfilename(filetypes=[("MP4 files", "*.mp4")])
if file_path:
settings["video_path"] = file_path
print(f"📂 Loaded video: {file_path}")
def load_srt():
file_path = filedialog.askopenfilename(filetypes=[("SRT Subtitle", "*.srt")])
if file_path:
settings["subtitles"] = parse_srt(file_path)
settings["current_index"] = 0
print(f"📝 Loaded {len(settings['subtitles'])} subtitles from {file_path}")
def next_sub():
if settings["current_index"] < len(settings["subtitles"]) - 1:
settings["current_index"] += 1
start_preview_thread()
def prev_sub():
if settings["current_index"] > 0:
settings["current_index"] -= 1
start_preview_thread()
def start_preview_thread():
threading.Thread(target=render_preview).start()
# === GUI ===
root = tk.Tk()
root.title("Subtitle Positioning Tool")
root.geometry("420x700")
load_btn = tk.Button(root, text="🎥 Load Video", command=open_video)
load_btn.pack(pady=10)
load_srt_btn = tk.Button(root, text="📑 Load SRT Subtitles", command=load_srt)
load_srt_btn.pack(pady=5)
tk.Label(root, text="Subtitle Y Position").pack()
sub_y_slider = tk.Scale(root, from_=1000, to=1800, orient="horizontal",
command=lambda v: update_setting("subtitle_y_px", v))
sub_y_slider.set(settings["subtitle_y_px"])
sub_y_slider.pack()
tk.Label(root, text="Highlight Y Offset").pack()
highlight_slider = tk.Scale(root, from_=-100, to=100, orient="horizontal",
command=lambda v: update_setting("highlight_offset", v))
highlight_slider.set(settings["highlight_offset"])
highlight_slider.pack()
tk.Label(root, text="Highlight X Offset").pack()
highlight_x_slider = tk.Scale(root, from_=-300, to=300, orient="horizontal",
command=lambda v: update_setting("highlight_x_offset", v))
highlight_x_slider.set(settings["highlight_x_offset"])
highlight_x_slider.pack()
tk.Label(root, text="Subtitle Font Size").pack()
sub_font_slider = tk.Scale(root, from_=30, to=100, orient="horizontal",
command=lambda v: update_setting("font_size_subtitle", v))
sub_font_slider.set(settings["font_size_subtitle"])
sub_font_slider.pack()
tk.Label(root, text="Highlight Font Size").pack()
highlight_font_slider = tk.Scale(root, from_=30, to=100, orient="horizontal",
command=lambda v: update_setting("font_size_highlight", v))
highlight_font_slider.set(settings["font_size_highlight"])
highlight_font_slider.pack()
tk.Label(root, text="Font").pack()
font_dropdown_var = tk.StringVar(value=settings["font"])
font_dropdown = tk.OptionMenu(root, font_dropdown_var, *AVAILABLE_FONTS, command=update_font)
font_dropdown.pack(pady=5)
preview_btn = tk.Button(root, text="▶️ Preview Clip", command=start_preview_thread)
preview_btn.pack(pady=10)
nav_frame = tk.Frame(root)
tk.Button(nav_frame, text="⏮️ Prev", command=prev_sub).pack(side="left", padx=5)
tk.Button(nav_frame, text="⏭️ Next", command=next_sub).pack(side="right", padx=5)
nav_frame.pack(pady=5)
save_btn = tk.Button(root, text="📂 Save Preset", command=save_presets)
save_btn.pack(pady=5)
load_preset_btn = tk.Button(root, text="📂 Load Preset", command=load_presets)
load_preset_btn.pack(pady=5)
root.mainloop()

View File

@ -2,6 +2,9 @@ import os
import numpy as np import numpy as np
from moviepy import VideoFileClip, TextClip, CompositeVideoClip from moviepy import VideoFileClip, TextClip, CompositeVideoClip
from faster_whisper import WhisperModel from faster_whisper import WhisperModel
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import threading
def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10): def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
print("🔍 Analyzing audio...") print("🔍 Analyzing audio...")
@ -82,7 +85,6 @@ def create_short_clip(video_path, start, end, subtitles, output_path):
# Base subtitle # Base subtitle
base_subtitle = TextClip( base_subtitle = TextClip(
text=chunk_text.upper(), text=chunk_text.upper(),
font='C:/Windows/Fonts/LatoWeb-Bold.ttf',
font_size=65, font_size=65,
color='white', color='white',
stroke_color='black', stroke_color='black',
@ -102,7 +104,6 @@ def create_short_clip(video_path, start, end, subtitles, output_path):
highlighted_word = TextClip( highlighted_word = TextClip(
text=word.upper(), text=word.upper(),
font='C:/Windows/Fonts/LatoWeb-Bold.ttf',
font_size=68, font_size=68,
color='#FFD700', color='#FFD700',
stroke_color='#FF6B35', stroke_color='#FF6B35',
@ -111,7 +112,7 @@ def create_short_clip(video_path, start, end, subtitles, output_path):
word_width, _ = highlighted_word.size word_width, _ = highlighted_word.size
word_x = current_x + (word_width / 2) word_x = current_x + (word_width / 2)
highlighted_word = highlighted_word.with_start(word_start).with_end(word_end).with_position((word_x -8, subtitle_y_px)) highlighted_word = highlighted_word.with_start(word_start).with_end(word_end).with_position((word_x -125, subtitle_y_px))
clips.append(highlighted_word) clips.append(highlighted_word)
current_x += word_width + 20 # Add spacing between words current_x += word_width + 20 # Add spacing between words
@ -129,19 +130,434 @@ def create_short_clip(video_path, start, end, subtitles, output_path):
clip.audio.reader.close() clip.audio.reader.close()
final.close() final.close()
def generate_shorts(video_path, max_clips=3, output_folder="shorts"): def validate_video(video_path, min_duration=30):
"""Validate video file and return duration"""
try:
clip = VideoFileClip(video_path)
duration = clip.duration
clip.close()
if duration < min_duration:
raise ValueError(f"Video is too short ({duration:.1f}s). Minimum {min_duration}s required.")
return duration
except Exception as e:
if "No such file" in str(e):
raise FileNotFoundError(f"Video file not found: {video_path}")
elif "could not open" in str(e).lower():
raise ValueError(f"Invalid or corrupted video file: {video_path}")
else:
raise ValueError(f"Error reading video: {str(e)}")
def generate_shorts(video_path, max_clips=3, output_folder="shorts", progress_callback=None, threshold_db=-30, clip_duration=5):
os.makedirs(output_folder, exist_ok=True) os.makedirs(output_folder, exist_ok=True)
best_moments = detect_loud_moments(video_path, threshold_db=-30)
# Validate video first
try:
video_duration = validate_video(video_path, min_duration=clip_duration * 2)
if progress_callback:
progress_callback(f"✅ Video validated ({video_duration:.1f}s)", 5)
except Exception as e:
if progress_callback:
progress_callback(f"❌ Video validation failed", 0)
raise e
if progress_callback:
progress_callback("🔍 Analyzing audio for loud moments...", 10)
best_moments = detect_loud_moments(video_path, chunk_duration=clip_duration, threshold_db=threshold_db)
selected = best_moments[:max_clips] selected = best_moments[:max_clips]
if not selected:
raise ValueError(f"No loud moments found with threshold {threshold_db} dB. Try lowering the threshold or use a different video.")
if progress_callback:
progress_callback(f"📊 Found {len(selected)} clips to generate", 20)
for i, (start, end) in enumerate(selected): for i, (start, end) in enumerate(selected):
if progress_callback:
progress_callback(f"🗣️ Transcribing clip {i+1}/{len(selected)}", 30 + (i * 20))
subtitles = transcribe_and_extract_subtitles(video_path, start, end) subtitles = transcribe_and_extract_subtitles(video_path, start, end)
out_path = os.path.join(output_folder, f"short_{i+1}.mp4") out_path = os.path.join(output_folder, f"short_{i+1}.mp4")
if progress_callback:
progress_callback(f"🎬 Creating video {i+1}/{len(selected)}", 50 + (i * 20))
create_short_clip(video_path, start, end, subtitles, out_path) create_short_clip(video_path, start, end, subtitles, out_path)
if progress_callback:
progress_callback("✅ All shorts generated successfully!", 100)
# GUI Components
class ShortsGeneratorGUI:
def __init__(self, root):
self.root = root
self.root.title("AI Shorts Generator")
self.root.geometry("500x400")
self.video_path = None
self.output_folder = "shorts"
self.max_clips = 3
self.threshold_db = -30
self.clip_duration = 5
self.create_widgets()
def create_widgets(self):
# Title
title_label = tk.Label(self.root, text="🎬 AI Shorts Generator", font=("Arial", 16, "bold"))
title_label.pack(pady=10)
# Video selection
video_frame = tk.Frame(self.root)
video_frame.pack(pady=10, padx=20, fill="x")
tk.Label(video_frame, text="Select Video File:").pack(anchor="w")
video_select_frame = tk.Frame(video_frame)
video_select_frame.pack(fill="x", pady=5)
self.video_label = tk.Label(video_select_frame, text="No video selected", bg="white", relief="sunken")
self.video_label.pack(side="left", fill="x", expand=True, padx=(0, 5))
tk.Button(video_select_frame, text="Browse", command=self.select_video).pack(side="right")
# Output folder selection
output_frame = tk.Frame(self.root)
output_frame.pack(pady=10, padx=20, fill="x")
tk.Label(output_frame, text="Output Folder:").pack(anchor="w")
output_select_frame = tk.Frame(output_frame)
output_select_frame.pack(fill="x", pady=5)
self.output_label = tk.Label(output_select_frame, text="shorts/", bg="white", relief="sunken")
self.output_label.pack(side="left", fill="x", expand=True, padx=(0, 5))
tk.Button(output_select_frame, text="Browse", command=self.select_output_folder).pack(side="right")
# Settings frame
settings_frame = tk.LabelFrame(self.root, text="Settings", padx=10, pady=10)
settings_frame.pack(pady=10, padx=20, fill="x")
# Max clips with on/off toggle
clips_frame = tk.Frame(settings_frame)
clips_frame.pack(fill="x", pady=5)
clips_left_frame = tk.Frame(clips_frame)
clips_left_frame.pack(side="left")
self.use_max_clips = tk.BooleanVar(value=True)
clips_checkbox = tk.Checkbutton(clips_left_frame, variable=self.use_max_clips, text="Max Clips to Generate:")
clips_checkbox.pack(side="left")
self.clips_var = tk.IntVar(value=3)
self.clips_spinbox = tk.Spinbox(clips_frame, from_=1, to=10, width=5, textvariable=self.clips_var)
self.clips_spinbox.pack(side="right")
# Bind checkbox to enable/disable spinbox
def toggle_clips_limit():
if self.use_max_clips.get():
self.clips_spinbox.config(state="normal")
else:
self.clips_spinbox.config(state="disabled")
self.use_max_clips.trace("w", lambda *args: toggle_clips_limit())
clips_checkbox.config(command=toggle_clips_limit)
# Audio threshold
threshold_frame = tk.Frame(settings_frame)
threshold_frame.pack(fill="x", pady=5)
tk.Label(threshold_frame, text="Audio Threshold (dB):").pack(side="left")
self.threshold_var = tk.IntVar(value=-30)
threshold_spinbox = tk.Spinbox(threshold_frame, from_=-50, to=0, width=5, textvariable=self.threshold_var)
threshold_spinbox.pack(side="right")
# Clip duration (increased to 120 seconds max)
duration_frame = tk.Frame(settings_frame)
duration_frame.pack(fill="x", pady=5)
tk.Label(duration_frame, text="Clip Duration (seconds):").pack(side="left")
self.duration_var = tk.IntVar(value=5)
duration_spinbox = tk.Spinbox(duration_frame, from_=3, to=120, width=5, textvariable=self.duration_var)
duration_spinbox.pack(side="right")
# Preview button
self.preview_btn = tk.Button(self.root, text="🔍 Preview Clips",
command=self.preview_clips, bg="#2196F3", fg="white",
font=("Arial", 10, "bold"), pady=5)
self.preview_btn.pack(pady=10)
# Generate button
self.generate_btn = tk.Button(self.root, text="🎬 Generate Shorts",
command=self.start_generation, bg="#4CAF50", fg="white",
font=("Arial", 12, "bold"), pady=10)
self.generate_btn.pack(pady=20)
# Progress frame
progress_frame = tk.Frame(self.root)
progress_frame.pack(pady=10, padx=20, fill="x")
self.progress_label = tk.Label(progress_frame, text="Ready to generate shorts")
self.progress_label.pack()
self.progress_bar = ttk.Progressbar(progress_frame, length=400, mode="determinate")
self.progress_bar.pack(pady=5)
def select_video(self):
file_path = filedialog.askopenfilename(
title="Select Video File",
filetypes=[("Video files", "*.mp4 *.mov *.avi *.mkv *.wmv")]
)
if file_path:
self.video_path = file_path
self.video_label.config(text=os.path.basename(file_path))
def select_output_folder(self):
folder_path = filedialog.askdirectory(title="Select Output Folder")
if folder_path:
self.output_folder = folder_path
self.output_label.config(text=folder_path)
def preview_clips(self):
if not self.video_path:
messagebox.showwarning("Warning", "Please select a video file first!")
return
try:
# Validate video first
validate_video(self.video_path, min_duration=self.duration_var.get() * 2)
# Analyze for loud moments
self.preview_btn.config(state="disabled", text="Analyzing...")
self.root.update()
loud_moments = detect_loud_moments(
self.video_path,
chunk_duration=self.duration_var.get(),
threshold_db=self.threshold_var.get()
)
if not loud_moments:
messagebox.showinfo("Preview", f"No loud moments found with threshold {self.threshold_var.get()} dB.\nTry lowering the threshold.")
return
# Show preview window
preview_window = tk.Toplevel(self.root)
preview_window.title("Preview and Select Clips")
preview_window.geometry("500x400")
tk.Label(preview_window, text=f"Found {len(loud_moments)} loud moments:", font=("Arial", 12, "bold")).pack(pady=10)
# Create scrollable frame for checkboxes
canvas = tk.Canvas(preview_window)
scrollbar = tk.Scrollbar(preview_window, orient="vertical", command=canvas.yview)
scrollable_frame = tk.Frame(canvas)
scrollable_frame.bind(
"<Configure>",
lambda e: canvas.configure(scrollregion=canvas.bbox("all"))
)
canvas.create_window((0, 0), window=scrollable_frame, anchor="nw")
canvas.configure(yscrollcommand=scrollbar.set)
# Store checkbox variables and clip data
self.clip_vars = []
# Use all clips if max clips is disabled, otherwise limit by setting
clips_to_show = loud_moments if not self.use_max_clips.get() else loud_moments[:self.clips_var.get()]
self.preview_clips_data = clips_to_show
# Add selectable clips with checkboxes
for i, (start, end) in enumerate(self.preview_clips_data, 1):
duration = end - start
time_str = f"Clip {i}: {start//60:02.0f}:{start%60:05.2f} - {end//60:02.0f}:{end%60:05.2f} ({duration:.1f}s)"
clip_var = tk.BooleanVar(value=True) # Default selected
self.clip_vars.append(clip_var)
clip_frame = tk.Frame(scrollable_frame)
clip_frame.pack(fill="x", padx=10, pady=2)
checkbox = tk.Checkbutton(clip_frame, variable=clip_var, text=time_str,
font=("Courier", 10), anchor="w")
checkbox.pack(fill="x")
canvas.pack(side="left", fill="both", expand=True, padx=10, pady=5)
scrollbar.pack(side="right", fill="y")
# Button frame
button_frame = tk.Frame(preview_window)
button_frame.pack(fill="x", padx=10, pady=10)
# Select/Deselect all buttons
control_frame = tk.Frame(button_frame)
control_frame.pack(fill="x", pady=5)
tk.Button(control_frame, text="Select All",
command=lambda: [var.set(True) for var in self.clip_vars]).pack(side="left", padx=5)
tk.Button(control_frame, text="Deselect All",
command=lambda: [var.set(False) for var in self.clip_vars]).pack(side="left", padx=5)
# Generate selected clips button (fixed size for full text visibility)
generate_selected_btn = tk.Button(button_frame, text="🎬 Generate Selected Clips",
command=lambda: self.generate_selected_clips(preview_window),
bg="#4CAF50", fg="white", font=("Arial", 11, "bold"),
pady=8, width=25)
generate_selected_btn.pack(fill="x", pady=5)
# Close button
tk.Button(button_frame, text="Close", command=preview_window.destroy).pack(pady=5)
except Exception as e:
messagebox.showerror("Preview Error", f"Error analyzing video: {str(e)}")
finally:
self.preview_btn.config(state="normal", text="🔍 Preview Clips")
def generate_selected_clips(self, preview_window):
"""Generate only the selected clips from preview"""
try:
# Get selected clips
selected_clips = []
for i, (clip_var, clip_data) in enumerate(zip(self.clip_vars, self.preview_clips_data)):
if clip_var.get():
selected_clips.append((i+1, clip_data)) # (clip_number, (start, end))
if not selected_clips:
messagebox.showwarning("Warning", "Please select at least one clip to generate!")
return
# Close preview window
preview_window.destroy()
# Show confirmation
clip_count = len(selected_clips)
clip_numbers = [str(num) for num, _ in selected_clips]
confirm_msg = f"Generate {clip_count} selected clips (#{', #'.join(clip_numbers)})?"
if not messagebox.askyesno("Confirm Generation", confirm_msg):
return
# Start generation in background thread
self.selected_clips_data = [clip_data for _, clip_data in selected_clips]
self.generate_btn.config(state="disabled", text="Generating Selected...")
thread = threading.Thread(target=self.selected_generation_worker)
thread.daemon = True
thread.start()
except Exception as e:
messagebox.showerror("Generation Error", f"Error starting generation: {str(e)}")
def selected_generation_worker(self):
"""Generate only selected clips"""
try:
# Check available disk space
import shutil
free_space_gb = shutil.disk_usage(self.output_folder)[2] / (1024**3)
if free_space_gb < 1:
raise RuntimeError(f"Insufficient disk space. Only {free_space_gb:.1f} GB available. Need at least 1 GB.")
# Validate video first
try:
video_duration = validate_video(self.video_path, min_duration=self.duration_var.get() * 2)
self.update_progress(f"✅ Video validated ({video_duration:.1f}s)", 5)
except Exception as e:
self.update_progress(f"❌ Video validation failed", 0)
raise e
os.makedirs(self.output_folder, exist_ok=True)
selected_count = len(self.selected_clips_data)
self.update_progress(f"📊 Generating {selected_count} selected clips", 10)
for i, (start, end) in enumerate(self.selected_clips_data):
self.update_progress(f"🗣️ Transcribing clip {i+1}/{selected_count}", 20 + (i * 30))
subtitles = transcribe_and_extract_subtitles(self.video_path, start, end)
out_path = os.path.join(self.output_folder, f"short_{i+1}.mp4")
self.update_progress(f"🎬 Creating video {i+1}/{selected_count}", 40 + (i * 30))
create_short_clip(self.video_path, start, end, subtitles, out_path)
self.update_progress("✅ Selected clips generated successfully!", 100)
messagebox.showinfo("Success", f"Successfully generated {selected_count} selected clips in '{self.output_folder}' folder!")
except FileNotFoundError as e:
messagebox.showerror("File Error", str(e))
except ValueError as e:
messagebox.showerror("Video Error", str(e))
except RuntimeError as e:
messagebox.showerror("System Error", str(e))
except Exception as e:
messagebox.showerror("Error", f"An unexpected error occurred: {str(e)}")
finally:
self.generate_btn.config(state="normal", text="🎬 Generate Shorts")
self.progress_bar["value"] = 0
self.progress_label.config(text="Ready to generate shorts")
def update_progress(self, message, percent):
self.progress_label.config(text=message)
self.progress_bar["value"] = percent
self.root.update()
def generation_worker(self):
try:
# Check available disk space
import shutil
free_space_gb = shutil.disk_usage(self.output_folder)[2] / (1024**3)
if free_space_gb < 1:
raise RuntimeError(f"Insufficient disk space. Only {free_space_gb:.1f} GB available. Need at least 1 GB.")
generate_shorts(
self.video_path,
max_clips=self.clips_var.get() if self.use_max_clips.get() else len(detect_loud_moments(self.video_path, chunk_duration=self.duration_var.get(), threshold_db=self.threshold_var.get())),
output_folder=self.output_folder,
progress_callback=self.update_progress,
threshold_db=self.threshold_var.get(),
clip_duration=self.duration_var.get()
)
messagebox.showinfo("Success", f"Successfully generated shorts in '{self.output_folder}' folder!")
except FileNotFoundError as e:
messagebox.showerror("File Error", str(e))
except ValueError as e:
messagebox.showerror("Video Error", str(e))
except RuntimeError as e:
messagebox.showerror("System Error", str(e))
except Exception as e:
messagebox.showerror("Error", f"An unexpected error occurred: {str(e)}")
finally:
self.generate_btn.config(state="normal", text="🎬 Generate Shorts")
self.progress_bar["value"] = 0
self.progress_label.config(text="Ready to generate shorts")
def start_generation(self):
if not self.video_path:
messagebox.showwarning("Warning", "Please select a video file first!")
return
self.generate_btn.config(state="disabled", text="Generating...")
thread = threading.Thread(target=self.generation_worker)
thread.daemon = True
thread.start()
def run_gui():
root = tk.Tk()
app = ShortsGeneratorGUI(root)
root.mainloop()
if __name__ == "__main__": if __name__ == "__main__":
import sys import sys
if len(sys.argv) < 2: if len(sys.argv) > 1 and sys.argv[1] == "--gui":
print("Usage: python shorts_generator.py your_video.mp4") # Run GUI mode
run_gui()
elif len(sys.argv) < 2:
print("Usage: python shorts_generator2.py your_video.mp4")
print(" or: python shorts_generator2.py --gui")
run_gui() # Default to GUI if no args
else: else:
generate_shorts(sys.argv[1]) # Run command line mode
try:
generate_shorts(sys.argv[1])
print("✅ Shorts generation completed successfully!")
except Exception as e:
print(f"❌ Error: {str(e)}")

155
subtitle_generator.py Normal file
View File

@ -0,0 +1,155 @@
import os
import math
import tempfile
import moviepy as mp
import speech_recognition as sr
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
def format_time(seconds):
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = int(seconds % 60)
millis = int((seconds - int(seconds)) * 1000)
return f"{hours:02}:{minutes:02}:{secs:02},{millis:03}"
def wrap_text(text, max_len=40):
"""
Wraps text to ~max_len characters per line without cutting words.
"""
words = text.split()
lines = []
current_line = ""
for word in words:
if len(current_line + " " + word) <= max_len:
current_line += (" " if current_line else "") + word
else:
lines.append(current_line)
current_line = word
if current_line:
lines.append(current_line)
return "\n".join(lines)
def write_srt(subtitles, output_path):
with open(output_path, 'w', encoding='utf-8') as f:
for i, sub in enumerate(subtitles, 1):
f.write(f"{i}\n")
f.write(f"{format_time(sub['start'])} --> {format_time(sub['end'])}\n")
f.write(f"{wrap_text(sub['text'])}\n\n")
def transcribe_video_to_srt(video_path, srt_output_path, progress_callback=None, chunk_duration=10):
try:
video = mp.VideoFileClip(video_path)
audio = video.audio
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
temp_audio_path = temp_audio_file.name
audio.write_audiofile(temp_audio_path, logger=None)
recognizer = sr.Recognizer()
subtitles = []
with sr.AudioFile(temp_audio_path) as source:
audio_duration = source.DURATION
num_chunks = math.ceil(audio_duration / chunk_duration)
for i in range(num_chunks):
start_time = i * chunk_duration
end_time = min((i + 1) * chunk_duration, audio_duration)
source_offset = start_time
duration = end_time - start_time
audio_data = recognizer.record(source, offset=source_offset, duration=duration)
try:
text = recognizer.recognize_google(audio_data)
subtitles.append({
"start": start_time,
"end": end_time,
"text": text
})
except sr.UnknownValueError:
pass
except sr.RequestError as e:
print(f"API error: {e}")
# Update progress bar
if progress_callback:
progress_callback(i + 1, num_chunks)
os.remove(temp_audio_path)
write_srt(subtitles, srt_output_path)
return True
except Exception as e:
print(f"Error: {e}")
return False
# -------------------- GUI --------------------
def select_file_and_generate():
video_path = filedialog.askopenfilename(
title="Select a video file",
filetypes=[("Video files", "*.mp4 *.mov *.avi *.mkv")]
)
if not video_path:
return
srt_output_path = filedialog.asksaveasfilename(
title="Save SRT subtitles as...",
defaultextension=".srt",
filetypes=[("Subtitle files", "*.srt")]
)
if not srt_output_path:
return
progress_bar["value"] = 0
progress_label.config(text="Starting...")
root.update()
def update_progress(current, total):
percent = (current / total) * 100
progress_bar["value"] = percent
progress_label.config(text=f"Progress: {current}/{total} chunks")
root.update()
success = transcribe_video_to_srt(video_path, srt_output_path, progress_callback=update_progress)
if success:
messagebox.showinfo("Success", f"Subtitles saved to:\n{srt_output_path}")
else:
messagebox.showerror("Error", "Something went wrong. See console for details.")
progress_label.config(text="Done")
# GUI Setup
root = tk.Tk()
root.title("Auto Subtitle Generator (.srt) with Progress")
frame = tk.Frame(root, padx=20, pady=20)
frame.pack()
label = tk.Label(frame, text="Select a video file to auto-generate subtitles (SRT):")
label.pack(pady=(0, 10))
select_button = tk.Button(frame, text="Select Video and Generate Subtitles", command=select_file_and_generate)
select_button.pack(pady=5)
progress_bar = ttk.Progressbar(frame, length=300, mode="determinate")
progress_bar.pack(pady=(15, 5))
progress_label = tk.Label(frame, text="Idle")
progress_label.pack()
root.mainloop()

View File

@ -0,0 +1 @@
{"subtitle_y_px": 1550, "highlight_offset": 0, "font_size_subtitle": 65, "font_size_highlight": 65, "highlight_x_offset": -53, "video_path": "C:/Users/braul/Desktop/shorts_project/shorts/short_1.mp4"}

3
subtitles.srt Normal file
View File

@ -0,0 +1,3 @@
1
00:00:00,000 --> 00:00:02,500
You're running