Refactor subtitle generation and integration with Whisper AI; remove unused files and enhance GUI for better user experience
This commit is contained in:
parent
5ce79f084d
commit
491040b148
175
app.py
175
app.py
@ -1,175 +0,0 @@
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog
|
||||
from moviepy import VideoFileClip, TextClip, CompositeVideoClip
|
||||
import threading
|
||||
import json
|
||||
|
||||
# Global settings with defaults
|
||||
settings = {
|
||||
"subtitle_y_px": 1550,
|
||||
"highlight_offset": -8,
|
||||
"font_size_subtitle": 65,
|
||||
"font_size_highlight": 68,
|
||||
"highlight_x_offset": 0,
|
||||
"video_path": None,
|
||||
"selected_font": "Arial" # Default font
|
||||
}
|
||||
|
||||
# Compatible fonts that work across different systems
|
||||
COMPATIBLE_FONTS = [
|
||||
"Arial",
|
||||
"Times-Roman",
|
||||
"Helvetica",
|
||||
"Courier",
|
||||
"Comic-Sans-MS",
|
||||
"Impact",
|
||||
"Verdana",
|
||||
"Tahoma",
|
||||
"Georgia",
|
||||
"Trebuchet-MS"
|
||||
]
|
||||
|
||||
preset_file = "subtitle_gui_presets.json"
|
||||
|
||||
def save_presets():
|
||||
with open(preset_file, "w") as f:
|
||||
json.dump(settings, f)
|
||||
print("💾 Presets saved!")
|
||||
|
||||
def load_presets():
|
||||
global settings
|
||||
try:
|
||||
with open(preset_file, "r") as f:
|
||||
loaded = json.load(f)
|
||||
settings.update(loaded)
|
||||
print("✅ Presets loaded!")
|
||||
sync_gui()
|
||||
except FileNotFoundError:
|
||||
print("⚠️ No presets found.")
|
||||
|
||||
def sync_gui():
|
||||
sub_y_slider.set(settings["subtitle_y_px"])
|
||||
highlight_slider.set(settings["highlight_offset"])
|
||||
highlight_x_slider.set(settings["highlight_x_offset"])
|
||||
sub_font_slider.set(settings["font_size_subtitle"])
|
||||
highlight_font_slider.set(settings["font_size_highlight"])
|
||||
font_var.set(settings["selected_font"])
|
||||
|
||||
def render_preview():
|
||||
if not settings["video_path"]:
|
||||
print("⚠️ No video selected.")
|
||||
return
|
||||
|
||||
clip = VideoFileClip(settings["video_path"]).subclipped(0, 3) # Use first 3 seconds
|
||||
vertical_clip = clip.resized(height=1920).cropped(width=1080, x_center=clip.w / 2)
|
||||
|
||||
subtitle_text = "THIS IS A TEST SUBTITLE"
|
||||
highlight_word = "SUBTITLE"
|
||||
|
||||
base_subtitle = TextClip(
|
||||
text=subtitle_text,
|
||||
font_size=settings["font_size_subtitle"],
|
||||
font=settings["selected_font"],
|
||||
color='white',
|
||||
stroke_color='black',
|
||||
stroke_width=5
|
||||
).with_duration(3).with_position(('center', settings["subtitle_y_px"]))
|
||||
|
||||
# Compute highlight word position
|
||||
full_text = subtitle_text.upper()
|
||||
words = full_text.split()
|
||||
highlight_index = words.index(highlight_word.upper())
|
||||
chars_before = sum(len(w) + 1 for w in words[:highlight_index])
|
||||
char_width = 35
|
||||
total_width = len(full_text) * char_width
|
||||
x_offset = (chars_before * char_width) - (total_width // 2) + settings["highlight_x_offset"]
|
||||
|
||||
highlighted_word = TextClip(
|
||||
text=highlight_word,
|
||||
font_size=settings["font_size_highlight"],
|
||||
font=settings["selected_font"],
|
||||
color='#FFD700',
|
||||
stroke_color='#FF6B35',
|
||||
stroke_width=5
|
||||
).with_duration(1.5).with_start(0.75).with_position((540 + x_offset, settings["subtitle_y_px"] + settings["highlight_offset"]))
|
||||
|
||||
final = CompositeVideoClip([vertical_clip, base_subtitle, highlighted_word], size=(1080, 1920))
|
||||
# Scale down the preview to fit 1080p monitor (max height ~900px to leave room for taskbar)
|
||||
preview_scale = 900 / 1920 # Scale factor to fit height
|
||||
preview_width = int(1080 * preview_scale)
|
||||
preview_height = int(1920 * preview_scale)
|
||||
preview_clip = final.resized((preview_width, preview_height))
|
||||
preview_clip.preview(fps=24, audio=False)
|
||||
|
||||
clip.close()
|
||||
final.close()
|
||||
preview_clip.close()
|
||||
|
||||
def update_setting(var_name, value):
|
||||
settings[var_name] = int(value)
|
||||
|
||||
def update_font(font_name):
|
||||
settings["selected_font"] = font_name
|
||||
|
||||
def open_video():
|
||||
file_path = filedialog.askopenfilename(filetypes=[("MP4 files", "*.mp4")])
|
||||
if file_path:
|
||||
settings["video_path"] = file_path
|
||||
print(f"📂 Loaded video: {file_path}")
|
||||
|
||||
def start_preview_thread():
|
||||
threading.Thread(target=render_preview).start()
|
||||
|
||||
# GUI Setup
|
||||
root = tk.Tk()
|
||||
root.title("Subtitle Positioning Tool")
|
||||
root.geometry("400x600")
|
||||
|
||||
load_btn = tk.Button(root, text="🎥 Load Video", command=open_video)
|
||||
load_btn.pack(pady=10)
|
||||
|
||||
tk.Label(root, text="Font Family").pack()
|
||||
font_var = tk.StringVar(value=settings["selected_font"])
|
||||
font_dropdown = tk.OptionMenu(root, font_var, *COMPATIBLE_FONTS, command=update_font)
|
||||
font_dropdown.pack(pady=5)
|
||||
|
||||
tk.Label(root, text="Subtitle Y Position").pack()
|
||||
sub_y_slider = tk.Scale(root, from_=1000, to=1800, orient="horizontal",
|
||||
command=lambda v: update_setting("subtitle_y_px", v))
|
||||
sub_y_slider.set(settings["subtitle_y_px"])
|
||||
sub_y_slider.pack()
|
||||
|
||||
tk.Label(root, text="Highlight Y Offset").pack()
|
||||
highlight_slider = tk.Scale(root, from_=-100, to=100, orient="horizontal",
|
||||
command=lambda v: update_setting("highlight_offset", v))
|
||||
highlight_slider.set(settings["highlight_offset"])
|
||||
highlight_slider.pack()
|
||||
|
||||
tk.Label(root, text="Highlight X Offset").pack()
|
||||
highlight_x_slider = tk.Scale(root, from_=-300, to=300, orient="horizontal",
|
||||
command=lambda v: update_setting("highlight_x_offset", v))
|
||||
highlight_x_slider.set(settings["highlight_x_offset"])
|
||||
highlight_x_slider.pack()
|
||||
|
||||
tk.Label(root, text="Subtitle Font Size").pack()
|
||||
sub_font_slider = tk.Scale(root, from_=30, to=100, orient="horizontal",
|
||||
command=lambda v: update_setting("font_size_subtitle", v))
|
||||
sub_font_slider.set(settings["font_size_subtitle"])
|
||||
sub_font_slider.pack()
|
||||
|
||||
tk.Label(root, text="Highlight Font Size").pack()
|
||||
highlight_font_slider = tk.Scale(root, from_=30, to=100, orient="horizontal",
|
||||
command=lambda v: update_setting("font_size_highlight", v))
|
||||
highlight_font_slider.set(settings["font_size_highlight"])
|
||||
highlight_font_slider.pack()
|
||||
|
||||
preview_btn = tk.Button(root, text="▶️ Preview Clip", command=start_preview_thread)
|
||||
preview_btn.pack(pady=10)
|
||||
|
||||
save_btn = tk.Button(root, text="💾 Save Preset", command=save_presets)
|
||||
save_btn.pack(pady=5)
|
||||
|
||||
load_preset_btn = tk.Button(root, text="📂 Load Preset", command=load_presets)
|
||||
load_preset_btn.pack(pady=5)
|
||||
|
||||
root.mainloop()
|
||||
322
app2.py
322
app2.py
@ -1,322 +0,0 @@
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog
|
||||
from moviepy import VideoFileClip, TextClip, CompositeVideoClip
|
||||
import threading
|
||||
import json
|
||||
import re
|
||||
import os
|
||||
import platform
|
||||
|
||||
def get_system_fonts():
|
||||
"""Get list of available system fonts"""
|
||||
fonts = []
|
||||
|
||||
if platform.system() == "Windows":
|
||||
# Common Windows font paths
|
||||
font_paths = [
|
||||
"C:/Windows/Fonts/",
|
||||
"C:/Windows/System32/Fonts/"
|
||||
]
|
||||
|
||||
common_fonts = []
|
||||
for font_path in font_paths:
|
||||
if os.path.exists(font_path):
|
||||
for file in os.listdir(font_path):
|
||||
if file.endswith(('.ttf', '.otf')):
|
||||
# Extract font name without extension
|
||||
font_name = os.path.splitext(file)[0]
|
||||
# Clean up common variations
|
||||
if 'arial' in font_name.lower() and 'bold' not in font_name.lower():
|
||||
common_fonts.append('arial.ttf')
|
||||
elif 'times' in font_name.lower() and 'bold' not in font_name.lower():
|
||||
common_fonts.append('times.ttf')
|
||||
elif 'courier' in font_name.lower() and 'bold' not in font_name.lower():
|
||||
common_fonts.append('cour.ttf')
|
||||
elif 'comic' in font_name.lower():
|
||||
common_fonts.append('comic.ttf')
|
||||
elif 'impact' in font_name.lower():
|
||||
common_fonts.append('impact.ttf')
|
||||
elif 'verdana' in font_name.lower():
|
||||
common_fonts.append('verdana.ttf')
|
||||
elif 'tahoma' in font_name.lower():
|
||||
common_fonts.append('tahoma.ttf')
|
||||
|
||||
# Add found fonts, fallback to common Windows fonts
|
||||
fonts = list(set(common_fonts)) if common_fonts else [
|
||||
'arial.ttf', 'times.ttf', 'cour.ttf', 'comic.ttf',
|
||||
'impact.ttf', 'verdana.ttf', 'tahoma.ttf'
|
||||
]
|
||||
|
||||
# Add option to use no font (system default)
|
||||
fonts.insert(0, 'System Default')
|
||||
return fonts
|
||||
|
||||
AVAILABLE_FONTS = get_system_fonts()
|
||||
|
||||
# Global settings with defaults
|
||||
settings = {
|
||||
"subtitle_y_px": 1550,
|
||||
"highlight_offset": -8,
|
||||
"font_size_subtitle": 65,
|
||||
"font_size_highlight": 68,
|
||||
"highlight_x_offset": 0,
|
||||
"video_path": None,
|
||||
"font": "System Default",
|
||||
"subtitles": [],
|
||||
"current_index": 0
|
||||
}
|
||||
|
||||
# Compatible fonts that work across different systems
|
||||
COMPATIBLE_FONTS = [
|
||||
"Arial",
|
||||
"Times-Roman",
|
||||
"Helvetica",
|
||||
"Courier",
|
||||
"Comic-Sans-MS",
|
||||
"Impact",
|
||||
"Verdana",
|
||||
"Tahoma",
|
||||
"Georgia",
|
||||
"Trebuchet-MS"
|
||||
]
|
||||
|
||||
preset_file = "subtitle_gui_presets.json"
|
||||
|
||||
# === SRT PARSER ===
|
||||
def parse_srt(file_path):
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
contents = f.read()
|
||||
pattern = r"(\d+)\s+(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})\s+([\s\S]*?)(?=\n\d+|\Z)"
|
||||
matches = re.findall(pattern, contents)
|
||||
subtitles = []
|
||||
for _, start, end, text in matches:
|
||||
subtitles.append({
|
||||
"start": srt_time_to_seconds(start),
|
||||
"end": srt_time_to_seconds(end),
|
||||
"text": text.replace('\n', ' ')
|
||||
})
|
||||
return subtitles
|
||||
|
||||
def srt_time_to_seconds(time_str):
|
||||
h, m, s_ms = time_str.split(':')
|
||||
s, ms = s_ms.split(',')
|
||||
return int(h)*3600 + int(m)*60 + int(s) + int(ms)/1000
|
||||
|
||||
# === PRESETS ===
|
||||
def save_presets():
|
||||
with open(preset_file, "w") as f:
|
||||
json.dump(settings, f)
|
||||
print("📂 Presets saved!")
|
||||
|
||||
def load_presets():
|
||||
global settings
|
||||
try:
|
||||
with open(preset_file, "r") as f:
|
||||
loaded = json.load(f)
|
||||
settings.update(loaded)
|
||||
print("✅ Presets loaded!")
|
||||
sync_gui()
|
||||
except FileNotFoundError:
|
||||
print("⚠️ No presets found.")
|
||||
|
||||
# === SYNC ===
|
||||
def sync_gui():
|
||||
sub_y_slider.set(settings["subtitle_y_px"])
|
||||
highlight_slider.set(settings["highlight_offset"])
|
||||
highlight_x_slider.set(settings["highlight_x_offset"])
|
||||
sub_font_slider.set(settings["font_size_subtitle"])
|
||||
highlight_font_slider.set(settings["font_size_highlight"])
|
||||
font_dropdown_var.set(settings["font"])
|
||||
|
||||
def render_preview():
|
||||
if not settings["video_path"] or not settings["subtitles"]:
|
||||
print("⚠️ Video or subtitles not loaded.")
|
||||
return
|
||||
|
||||
sub = settings["subtitles"][settings["current_index"]]
|
||||
subtitle_text = sub["text"]
|
||||
start_time = sub["start"]
|
||||
end_time = sub["end"]
|
||||
duration = end_time - start_time
|
||||
|
||||
clip = VideoFileClip(settings["video_path"]).subclipped(start_time, end_time)
|
||||
vertical_clip = clip.resized(height=1920).cropped(width=1080, x_center=clip.w / 2)
|
||||
|
||||
highlight_word = subtitle_text.split()[-1] # Highlight last word for now
|
||||
|
||||
# Create TextClip with font if specified, otherwise use system default
|
||||
if settings["font"] == "System Default":
|
||||
base_subtitle = TextClip(
|
||||
text=subtitle_text,
|
||||
font_size=settings["font_size_subtitle"],
|
||||
color='white',
|
||||
stroke_color='black',
|
||||
stroke_width=5
|
||||
).with_duration(duration).with_position(('center', settings["subtitle_y_px"]))
|
||||
else:
|
||||
try:
|
||||
base_subtitle = TextClip(
|
||||
text=subtitle_text,
|
||||
font=settings["font"],
|
||||
font_size=settings["font_size_subtitle"],
|
||||
color='white',
|
||||
stroke_color='black',
|
||||
stroke_width=5
|
||||
).with_duration(duration).with_position(('center', settings["subtitle_y_px"]))
|
||||
except:
|
||||
# Fallback to system default if font fails
|
||||
print(f"⚠️ Font {settings['font']} failed, using system default")
|
||||
base_subtitle = TextClip(
|
||||
text=subtitle_text,
|
||||
font_size=settings["font_size_subtitle"],
|
||||
color='white',
|
||||
stroke_color='black',
|
||||
stroke_width=5
|
||||
).with_duration(duration).with_position(('center', settings["subtitle_y_px"]))
|
||||
|
||||
full_text = subtitle_text.upper()
|
||||
words = full_text.split()
|
||||
try:
|
||||
highlight_index = words.index(highlight_word.upper())
|
||||
except ValueError:
|
||||
highlight_index = len(words) - 1
|
||||
|
||||
chars_before = sum(len(w) + 1 for w in words[:highlight_index])
|
||||
char_width = 35
|
||||
total_width = len(full_text) * char_width
|
||||
x_offset = (chars_before * char_width) - (total_width // 2) + settings["highlight_x_offset"]
|
||||
|
||||
# Create highlighted word with same font logic
|
||||
if settings["font"] == "System Default":
|
||||
highlighted_word = TextClip(
|
||||
text=highlight_word,
|
||||
font_size=settings["font_size_highlight"],
|
||||
color='#FFD700',
|
||||
stroke_color='#FF6B35',
|
||||
stroke_width=5
|
||||
).with_duration(duration / 2).with_start(duration / 4).with_position((540 + x_offset, settings["subtitle_y_px"] + settings["highlight_offset"]))
|
||||
else:
|
||||
try:
|
||||
highlighted_word = TextClip(
|
||||
text=highlight_word,
|
||||
font=settings["font"],
|
||||
font_size=settings["font_size_highlight"],
|
||||
color='#FFD700',
|
||||
stroke_color='#FF6B35',
|
||||
stroke_width=5
|
||||
).with_duration(duration / 2).with_start(duration / 4).with_position((540 + x_offset, settings["subtitle_y_px"] + settings["highlight_offset"]))
|
||||
except:
|
||||
# Fallback to system default if font fails
|
||||
highlighted_word = TextClip(
|
||||
text=highlight_word,
|
||||
font_size=settings["font_size_highlight"],
|
||||
color='#FFD700',
|
||||
stroke_color='#FF6B35',
|
||||
stroke_width=5
|
||||
).with_duration(duration / 2).with_start(duration / 4).with_position((540 + x_offset, settings["subtitle_y_px"] + settings["highlight_offset"]))
|
||||
|
||||
final = CompositeVideoClip([vertical_clip, base_subtitle, highlighted_word], size=(1080, 1920))
|
||||
# Scale down the preview to fit 1080p monitor (max height ~900px to leave room for taskbar)
|
||||
preview_scale = 900 / 1920 # Scale factor to fit height
|
||||
preview_width = int(1080 * preview_scale)
|
||||
preview_height = int(1920 * preview_scale)
|
||||
preview_clip = final.resized((preview_width, preview_height))
|
||||
preview_clip.preview(fps=24, audio=False)
|
||||
|
||||
clip.close()
|
||||
final.close()
|
||||
preview_clip.close()
|
||||
|
||||
def update_setting(var_name, value):
|
||||
settings[var_name] = int(value) if var_name.startswith("font_size") or "offset" in var_name or "y_px" in var_name else value
|
||||
|
||||
def update_font(value):
|
||||
settings["font"] = value
|
||||
|
||||
def open_video():
|
||||
file_path = filedialog.askopenfilename(filetypes=[("MP4 files", "*.mp4")])
|
||||
if file_path:
|
||||
settings["video_path"] = file_path
|
||||
print(f"📂 Loaded video: {file_path}")
|
||||
|
||||
def load_srt():
|
||||
file_path = filedialog.askopenfilename(filetypes=[("SRT Subtitle", "*.srt")])
|
||||
if file_path:
|
||||
settings["subtitles"] = parse_srt(file_path)
|
||||
settings["current_index"] = 0
|
||||
print(f"📝 Loaded {len(settings['subtitles'])} subtitles from {file_path}")
|
||||
|
||||
def next_sub():
|
||||
if settings["current_index"] < len(settings["subtitles"]) - 1:
|
||||
settings["current_index"] += 1
|
||||
start_preview_thread()
|
||||
|
||||
def prev_sub():
|
||||
if settings["current_index"] > 0:
|
||||
settings["current_index"] -= 1
|
||||
start_preview_thread()
|
||||
|
||||
def start_preview_thread():
|
||||
threading.Thread(target=render_preview).start()
|
||||
|
||||
# === GUI ===
|
||||
root = tk.Tk()
|
||||
root.title("Subtitle Positioning Tool")
|
||||
root.geometry("420x700")
|
||||
|
||||
load_btn = tk.Button(root, text="🎥 Load Video", command=open_video)
|
||||
load_btn.pack(pady=10)
|
||||
|
||||
load_srt_btn = tk.Button(root, text="📑 Load SRT Subtitles", command=load_srt)
|
||||
load_srt_btn.pack(pady=5)
|
||||
|
||||
tk.Label(root, text="Subtitle Y Position").pack()
|
||||
sub_y_slider = tk.Scale(root, from_=1000, to=1800, orient="horizontal",
|
||||
command=lambda v: update_setting("subtitle_y_px", v))
|
||||
sub_y_slider.set(settings["subtitle_y_px"])
|
||||
sub_y_slider.pack()
|
||||
|
||||
tk.Label(root, text="Highlight Y Offset").pack()
|
||||
highlight_slider = tk.Scale(root, from_=-100, to=100, orient="horizontal",
|
||||
command=lambda v: update_setting("highlight_offset", v))
|
||||
highlight_slider.set(settings["highlight_offset"])
|
||||
highlight_slider.pack()
|
||||
|
||||
tk.Label(root, text="Highlight X Offset").pack()
|
||||
highlight_x_slider = tk.Scale(root, from_=-300, to=300, orient="horizontal",
|
||||
command=lambda v: update_setting("highlight_x_offset", v))
|
||||
highlight_x_slider.set(settings["highlight_x_offset"])
|
||||
highlight_x_slider.pack()
|
||||
|
||||
tk.Label(root, text="Subtitle Font Size").pack()
|
||||
sub_font_slider = tk.Scale(root, from_=30, to=100, orient="horizontal",
|
||||
command=lambda v: update_setting("font_size_subtitle", v))
|
||||
sub_font_slider.set(settings["font_size_subtitle"])
|
||||
sub_font_slider.pack()
|
||||
|
||||
tk.Label(root, text="Highlight Font Size").pack()
|
||||
highlight_font_slider = tk.Scale(root, from_=30, to=100, orient="horizontal",
|
||||
command=lambda v: update_setting("font_size_highlight", v))
|
||||
highlight_font_slider.set(settings["font_size_highlight"])
|
||||
highlight_font_slider.pack()
|
||||
|
||||
tk.Label(root, text="Font").pack()
|
||||
font_dropdown_var = tk.StringVar(value=settings["font"])
|
||||
font_dropdown = tk.OptionMenu(root, font_dropdown_var, *AVAILABLE_FONTS, command=update_font)
|
||||
font_dropdown.pack(pady=5)
|
||||
|
||||
preview_btn = tk.Button(root, text="▶️ Preview Clip", command=start_preview_thread)
|
||||
preview_btn.pack(pady=10)
|
||||
|
||||
nav_frame = tk.Frame(root)
|
||||
tk.Button(nav_frame, text="⏮️ Prev", command=prev_sub).pack(side="left", padx=5)
|
||||
tk.Button(nav_frame, text="⏭️ Next", command=next_sub).pack(side="right", padx=5)
|
||||
nav_frame.pack(pady=5)
|
||||
|
||||
save_btn = tk.Button(root, text="📂 Save Preset", command=save_presets)
|
||||
save_btn.pack(pady=5)
|
||||
|
||||
load_preset_btn = tk.Button(root, text="📂 Load Preset", command=load_presets)
|
||||
load_preset_btn.pack(pady=5)
|
||||
|
||||
root.mainloop()
|
||||
22
myvideo.srt
Normal file
22
myvideo.srt
Normal file
@ -0,0 +1,22 @@
|
||||
1
|
||||
00:00:30,000 --> 00:00:40,000
|
||||
okay after we will
|
||||
|
||||
2
|
||||
00:02:00,000 --> 00:02:10,000
|
||||
find it difficult to believe we prepare
|
||||
to fight arm and arm the corny and royal
|
||||
family
|
||||
|
||||
3
|
||||
00:02:20,000 --> 00:02:30,000
|
||||
hello me
|
||||
|
||||
4
|
||||
00:02:30,000 --> 00:02:40,000
|
||||
as expected
|
||||
|
||||
5
|
||||
00:02:40,000 --> 00:02:50,000
|
||||
gacha
|
||||
|
||||
@ -112,7 +112,8 @@ def create_short_clip(video_path, start, end, subtitles, output_path):
|
||||
word_width, _ = highlighted_word.size
|
||||
|
||||
word_x = current_x + (word_width / 2)
|
||||
highlighted_word = highlighted_word.with_start(word_start).with_end(word_end).with_position((word_x -125, subtitle_y_px))
|
||||
highlighted_word = highlighted_word.with_start(word_start).with_end(word_end).with_position((word_x -125
|
||||
, subtitle_y_px))
|
||||
clips.append(highlighted_word)
|
||||
|
||||
current_x += word_width + 20 # Add spacing between words
|
||||
|
||||
4
sub2.srt
Normal file
4
sub2.srt
Normal file
@ -0,0 +1,4 @@
|
||||
1
|
||||
00:00:00,000 --> 00:00:08,250
|
||||
Yeah, yeah, level she's 24.
|
||||
|
||||
@ -1,157 +0,0 @@
|
||||
import os
|
||||
import numpy as np
|
||||
from moviepy import VideoFileClip, TextClip, CompositeVideoClip
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
|
||||
print("🔍 Analyzing audio...")
|
||||
clip = VideoFileClip(video_path)
|
||||
audio = clip.audio.to_soundarray(fps=44100)
|
||||
volume = np.linalg.norm(audio, axis=1)
|
||||
chunk_size = int(chunk_duration * 44100)
|
||||
|
||||
loud_chunks = []
|
||||
max_db = -float('inf')
|
||||
for i in range(0, len(volume), chunk_size):
|
||||
chunk = volume[i:i+chunk_size]
|
||||
db = 20 * np.log10(np.mean(chunk) + 1e-10)
|
||||
max_db = max(max_db, db)
|
||||
if db > threshold_db:
|
||||
start = i / 44100
|
||||
loud_chunks.append((start, min(start + chunk_duration, clip.duration)))
|
||||
|
||||
print(f"🔊 Max volume found: {max_db:.2f} dB, threshold: {threshold_db} dB")
|
||||
print(f"📈 Found {len(loud_chunks)} loud moments")
|
||||
clip.close()
|
||||
return loud_chunks
|
||||
|
||||
def transcribe_and_extract_subtitles(video_path, start, end):
|
||||
print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...")
|
||||
model = WhisperModel("base", device="cpu", compute_type="int8")
|
||||
segments, _ = model.transcribe(video_path, beam_size=5, language="en", vad_filter=True)
|
||||
|
||||
subtitles = []
|
||||
for segment in segments:
|
||||
if start <= segment.start <= end:
|
||||
subtitles.append((segment.start - start, segment.end - start, segment.text))
|
||||
return subtitles
|
||||
|
||||
def create_short_clip(video_path, start, end, subtitles, output_path):
|
||||
print(f"🎬 Creating short: {output_path}")
|
||||
clip = VideoFileClip(video_path).subclipped(start, end)
|
||||
video_duration = clip.duration
|
||||
print(f"📏 Video clip duration: {video_duration:.2f}s")
|
||||
|
||||
# Convert to vertical 9:16
|
||||
vertical_clip = clip.resized(height=1920).cropped(width=1080, x_center=clip.w / 2)
|
||||
|
||||
clips = [vertical_clip]
|
||||
for (s, e, text) in subtitles:
|
||||
try:
|
||||
# Ensure subtitle timing doesn't exceed video duration
|
||||
subtitle_start = max(0, s)
|
||||
subtitle_end = min(e, video_duration)
|
||||
|
||||
if subtitle_start >= video_duration or subtitle_end <= subtitle_start:
|
||||
print(f"⚠️ Skipping subtitle outside video duration: {text[:30]}...")
|
||||
continue
|
||||
|
||||
# Opus Clip style professional subtitles
|
||||
words = text.strip().split()
|
||||
if not words:
|
||||
continue
|
||||
|
||||
# Break text into smaller chunks for better readability (max 3-4 words per line)
|
||||
chunks = []
|
||||
current_chunk = []
|
||||
for word in words:
|
||||
current_chunk.append(word)
|
||||
if len(current_chunk) >= 3 or len(' '.join(current_chunk)) > 25:
|
||||
chunks.append(' '.join(current_chunk))
|
||||
current_chunk = []
|
||||
if current_chunk:
|
||||
chunks.append(' '.join(current_chunk))
|
||||
|
||||
# Position subtitles in the center-bottom area (Opus style)
|
||||
subtitle_position = 0.75
|
||||
|
||||
# Create subtitle for each chunk with smooth transitions
|
||||
chunk_duration = (subtitle_end - subtitle_start) / len(chunks)
|
||||
|
||||
for chunk_idx, chunk_text in enumerate(chunks):
|
||||
chunk_start = subtitle_start + (chunk_idx * chunk_duration)
|
||||
chunk_end = min(chunk_start + chunk_duration, subtitle_end)
|
||||
|
||||
chunk_words = chunk_text.split()
|
||||
|
||||
# Base subtitle with Opus-style design (bold white text with strong outline)
|
||||
base_subtitle = TextClip(
|
||||
text=chunk_text.upper(),
|
||||
font='C:/Windows/Fonts/LatoWeb-Bold.ttf', # Lato Bold - excellent for subtitles
|
||||
font_size=65, # Larger, chunkier text
|
||||
color='white',
|
||||
stroke_color='black',
|
||||
stroke_width=5 # Thicker outline for better readability
|
||||
)
|
||||
base_subtitle = base_subtitle.with_start(chunk_start).with_end(chunk_end).with_position(('center', subtitle_position), relative=True)
|
||||
clips.append(base_subtitle)
|
||||
|
||||
# Opus-style word-by-word highlighting (yellow/orange like Opus)
|
||||
word_duration = chunk_duration / len(chunk_words)
|
||||
|
||||
for i, word in enumerate(chunk_words):
|
||||
word_start = chunk_start + (i * word_duration)
|
||||
word_end = min(word_start + word_duration * 0.8, chunk_end)
|
||||
|
||||
# Opus-style highlighted word (vibrant yellow/orange)
|
||||
highlighted_word = TextClip(
|
||||
text=word.upper(),
|
||||
font='C:/Windows/Fonts/LatoWeb-Bold.ttf', # Lato Bold for consistency
|
||||
font_size=68, # Slightly larger for highlight effect
|
||||
color='#FFD700', # Gold/yellow like Opus Clip
|
||||
stroke_color='#FF6B35', # Orange outline for pop
|
||||
stroke_width=5
|
||||
)
|
||||
|
||||
# Calculate precise word positioning within the chunk
|
||||
words_before = chunk_words[:i]
|
||||
chars_before = sum(len(w) for w in words_before) + len(words_before)
|
||||
|
||||
# More accurate character width calculation for Arial Bold
|
||||
char_width = 35 # Adjusted for larger, bolder font
|
||||
total_chunk_width = len(chunk_text) * char_width
|
||||
word_x_offset = (chars_before * char_width) - (total_chunk_width // 2)
|
||||
|
||||
highlighted_word = highlighted_word.with_start(word_start).with_end(word_end).with_position((540 + word_x_offset, subtitle_position), relative=(False, True))
|
||||
clips.append(highlighted_word)
|
||||
|
||||
print(f"✅ Added Opus-style subtitle ({subtitle_start:.1f}s-{subtitle_end:.1f}s): {text[:30]}...")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Subtitle error: {e}, skipping subtitle: {text[:50]}...")
|
||||
continue
|
||||
|
||||
final = CompositeVideoClip(clips, size=(1080, 1920))
|
||||
final.write_videofile(output_path, codec="libx264", audio_codec="aac", threads=1)
|
||||
|
||||
# 💥 Force close to avoid Windows pipe errors
|
||||
clip.reader.close()
|
||||
if clip.audio:
|
||||
clip.audio.reader.close()
|
||||
final.close()
|
||||
|
||||
def generate_shorts(video_path, max_clips=3, output_folder="shorts"):
|
||||
os.makedirs(output_folder, exist_ok=True)
|
||||
best_moments = detect_loud_moments(video_path, threshold_db=-30)
|
||||
|
||||
selected = best_moments[:max_clips]
|
||||
for i, (start, end) in enumerate(selected):
|
||||
subtitles = transcribe_and_extract_subtitles(video_path, start, end)
|
||||
out_path = os.path.join(output_folder, f"short_{i+1}.mp4")
|
||||
create_short_clip(video_path, start, end, subtitles, out_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python shorts_generator.py your_video.mp4")
|
||||
else:
|
||||
generate_shorts(sys.argv[1])
|
||||
@ -2,9 +2,12 @@ import os
|
||||
import math
|
||||
import tempfile
|
||||
import moviepy as mp
|
||||
import speech_recognition as sr
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog, messagebox, ttk
|
||||
from pydub import AudioSegment
|
||||
from pydub.silence import split_on_silence
|
||||
import threading
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
|
||||
def format_time(seconds):
|
||||
@ -44,52 +47,71 @@ def write_srt(subtitles, output_path):
|
||||
f.write(f"{wrap_text(sub['text'])}\n\n")
|
||||
|
||||
|
||||
def transcribe_video_to_srt(video_path, srt_output_path, progress_callback=None, chunk_duration=10):
|
||||
def transcribe_video_to_srt(video_path, srt_output_path, progress_callback=None, model_size="base", language="auto"):
|
||||
try:
|
||||
print("📽️ Loading video file...")
|
||||
video = mp.VideoFileClip(video_path)
|
||||
audio = video.audio
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio_file:
|
||||
temp_audio_path = temp_audio_file.name
|
||||
print("🔊 Extracting audio...")
|
||||
audio.write_audiofile(temp_audio_path, logger=None)
|
||||
|
||||
recognizer = sr.Recognizer()
|
||||
print(f"🤖 Loading Whisper model ({model_size})...")
|
||||
# Initialize Whisper model - much more accurate than Google Speech Recognition
|
||||
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
||||
|
||||
print("🎯 Transcribing with Whisper AI...")
|
||||
# Transcribe the entire audio file at once - Whisper handles timing automatically
|
||||
segments, info = model.transcribe(
|
||||
temp_audio_path,
|
||||
language=None if language == "auto" else language,
|
||||
word_timestamps=True,
|
||||
vad_filter=True, # Voice Activity Detection for better accuracy
|
||||
vad_parameters=dict(min_silence_duration_ms=500)
|
||||
)
|
||||
|
||||
print(f"🌍 Detected language: {info.language} (confidence: {info.language_probability:.2f})")
|
||||
|
||||
subtitles = []
|
||||
segment_list = list(segments) # Convert generator to list for progress tracking
|
||||
|
||||
print(f"📝 Processing {len(segment_list)} speech segments...")
|
||||
|
||||
for i, segment in enumerate(segment_list):
|
||||
# Whisper provides precise timing and text
|
||||
start_time = segment.start
|
||||
end_time = segment.end
|
||||
text = segment.text.strip()
|
||||
|
||||
if text and len(text) > 0:
|
||||
subtitles.append({
|
||||
"start": start_time,
|
||||
"end": end_time,
|
||||
"text": text
|
||||
})
|
||||
print(f"✅ Segment {i+1}: '{text[:50]}...' ({start_time:.1f}s - {end_time:.1f}s)")
|
||||
|
||||
# Update progress bar
|
||||
if progress_callback:
|
||||
progress_callback(i + 1, len(segment_list))
|
||||
|
||||
with sr.AudioFile(temp_audio_path) as source:
|
||||
audio_duration = source.DURATION
|
||||
num_chunks = math.ceil(audio_duration / chunk_duration)
|
||||
|
||||
for i in range(num_chunks):
|
||||
start_time = i * chunk_duration
|
||||
end_time = min((i + 1) * chunk_duration, audio_duration)
|
||||
|
||||
source_offset = start_time
|
||||
duration = end_time - start_time
|
||||
|
||||
audio_data = recognizer.record(source, offset=source_offset, duration=duration)
|
||||
|
||||
try:
|
||||
text = recognizer.recognize_google(audio_data)
|
||||
subtitles.append({
|
||||
"start": start_time,
|
||||
"end": end_time,
|
||||
"text": text
|
||||
})
|
||||
except sr.UnknownValueError:
|
||||
pass
|
||||
except sr.RequestError as e:
|
||||
print(f"API error: {e}")
|
||||
|
||||
# Update progress bar
|
||||
if progress_callback:
|
||||
progress_callback(i + 1, num_chunks)
|
||||
|
||||
os.remove(temp_audio_path)
|
||||
# Clean up
|
||||
if os.path.exists(temp_audio_path):
|
||||
os.remove(temp_audio_path)
|
||||
|
||||
if video:
|
||||
video.close()
|
||||
if audio:
|
||||
audio.close()
|
||||
|
||||
print(f"🎯 Generated {len(subtitles)} subtitle segments with Whisper AI")
|
||||
write_srt(subtitles, srt_output_path)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
print(f"❌ Error: {e}")
|
||||
return False
|
||||
|
||||
|
||||
@ -98,7 +120,7 @@ def transcribe_video_to_srt(video_path, srt_output_path, progress_callback=None,
|
||||
def select_file_and_generate():
|
||||
video_path = filedialog.askopenfilename(
|
||||
title="Select a video file",
|
||||
filetypes=[("Video files", "*.mp4 *.mov *.avi *.mkv")]
|
||||
filetypes=[("Video files", "*.mp4 *.mov *.avi *.mkv *.webm *.flv")]
|
||||
)
|
||||
|
||||
if not video_path:
|
||||
@ -113,43 +135,127 @@ def select_file_and_generate():
|
||||
if not srt_output_path:
|
||||
return
|
||||
|
||||
# Disable button during processing
|
||||
select_button.config(state="disabled", text="Processing...")
|
||||
progress_bar["value"] = 0
|
||||
progress_label.config(text="Starting...")
|
||||
progress_label.config(text="Starting speech recognition...")
|
||||
status_label.config(text="🔄 Processing video...", fg="blue")
|
||||
root.update()
|
||||
|
||||
def update_progress(current, total):
|
||||
percent = (current / total) * 100
|
||||
progress_bar["value"] = percent
|
||||
progress_label.config(text=f"Progress: {current}/{total} chunks")
|
||||
progress_label.config(text=f"Processing: {current}/{total} segments")
|
||||
root.update()
|
||||
|
||||
success = transcribe_video_to_srt(video_path, srt_output_path, progress_callback=update_progress)
|
||||
def process_video():
|
||||
try:
|
||||
model_size = model_size_var.get()
|
||||
language = language_var.get()
|
||||
|
||||
success = transcribe_video_to_srt(
|
||||
video_path,
|
||||
srt_output_path,
|
||||
progress_callback=update_progress,
|
||||
model_size=model_size,
|
||||
language=language
|
||||
)
|
||||
|
||||
if success:
|
||||
messagebox.showinfo("Success", f"Subtitles saved to:\n{srt_output_path}")
|
||||
else:
|
||||
messagebox.showerror("Error", "Something went wrong. See console for details.")
|
||||
if success:
|
||||
status_label.config(text="✅ Subtitles generated successfully!", fg="green")
|
||||
messagebox.showinfo("Success", f"Subtitles saved to:\n{srt_output_path}\n\nOpen with your subtitle_generator2.py for editing!")
|
||||
else:
|
||||
status_label.config(text="❌ Error occurred during processing", fg="red")
|
||||
messagebox.showerror("Error", "Something went wrong. Check console for details.")
|
||||
|
||||
progress_label.config(text="Done")
|
||||
except Exception as e:
|
||||
status_label.config(text="❌ Unexpected error occurred", fg="red")
|
||||
messagebox.showerror("Error", f"Unexpected error: {e}")
|
||||
finally:
|
||||
# Re-enable button
|
||||
select_button.config(state="normal", text="📂 Select Video and Generate Subtitles")
|
||||
progress_label.config(text="Done")
|
||||
|
||||
# Run in separate thread to prevent GUI freezing
|
||||
thread = threading.Thread(target=process_video)
|
||||
thread.daemon = True
|
||||
thread.start()
|
||||
|
||||
|
||||
# GUI Setup
|
||||
root = tk.Tk()
|
||||
root.title("Auto Subtitle Generator (.srt) with Progress")
|
||||
root.title("🎬 Auto Subtitle Generator - Speech to SRT")
|
||||
root.geometry("500x350")
|
||||
|
||||
frame = tk.Frame(root, padx=20, pady=20)
|
||||
frame.pack()
|
||||
frame.pack(fill="both", expand=True)
|
||||
|
||||
label = tk.Label(frame, text="Select a video file to auto-generate subtitles (SRT):")
|
||||
label.pack(pady=(0, 10))
|
||||
# Title
|
||||
title_label = tk.Label(frame, text="🎬 Auto Subtitle Generator", font=("Arial", 16, "bold"))
|
||||
title_label.pack(pady=(0, 10))
|
||||
|
||||
select_button = tk.Button(frame, text="Select Video and Generate Subtitles", command=select_file_and_generate)
|
||||
select_button.pack(pady=5)
|
||||
subtitle_label = tk.Label(frame, text="Extract speech from video and create perfectly timed SRT subtitles", font=("Arial", 10))
|
||||
subtitle_label.pack(pady=(0, 20))
|
||||
|
||||
progress_bar = ttk.Progressbar(frame, length=300, mode="determinate")
|
||||
progress_bar.pack(pady=(15, 5))
|
||||
# Settings Frame
|
||||
settings_frame = tk.LabelFrame(frame, text="Whisper AI Settings", padx=10, pady=10)
|
||||
settings_frame.pack(fill="x", pady=(0, 15))
|
||||
|
||||
progress_label = tk.Label(frame, text="Idle")
|
||||
# Model Size Selection
|
||||
model_frame = tk.Frame(settings_frame)
|
||||
model_frame.pack(fill="x", pady=(0, 10))
|
||||
|
||||
tk.Label(model_frame, text="🧠 Model Size:", font=("Arial", 9)).pack(side="left")
|
||||
model_size_var = tk.StringVar(value="base")
|
||||
model_dropdown = ttk.Combobox(model_frame, textvariable=model_size_var,
|
||||
values=["tiny", "base", "small", "medium", "large"],
|
||||
state="readonly", width=12)
|
||||
model_dropdown.pack(side="right")
|
||||
|
||||
# Language Selection
|
||||
language_frame = tk.Frame(settings_frame)
|
||||
language_frame.pack(fill="x", pady=(0, 10))
|
||||
|
||||
tk.Label(language_frame, text="🌍 Language:", font=("Arial", 9)).pack(side="left")
|
||||
language_var = tk.StringVar(value="auto")
|
||||
language_dropdown = ttk.Combobox(language_frame, textvariable=language_var,
|
||||
values=["auto", "en", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "zh"],
|
||||
state="readonly", width=12)
|
||||
language_dropdown.pack(side="right")
|
||||
|
||||
# Help text
|
||||
help_label = tk.Label(
|
||||
settings_frame,
|
||||
text=" 💡 Base model recommended for best speed/accuracy balance\n 🔍 Auto language detection works for most videos",
|
||||
font=("Arial", 8),
|
||||
fg="gray"
|
||||
)
|
||||
help_label.pack(anchor="w")
|
||||
|
||||
# Main Action Button
|
||||
select_button = tk.Button(
|
||||
frame,
|
||||
text="📂 Select Video and Generate Subtitles",
|
||||
command=select_file_and_generate,
|
||||
font=("Arial", 11, "bold"),
|
||||
bg="#4CAF50",
|
||||
fg="white",
|
||||
pady=8
|
||||
)
|
||||
select_button.pack(pady=15, fill="x")
|
||||
|
||||
# Progress Section
|
||||
progress_frame = tk.LabelFrame(frame, text="Progress", padx=10, pady=10)
|
||||
progress_frame.pack(fill="x", pady=(0, 10))
|
||||
|
||||
progress_bar = ttk.Progressbar(progress_frame, length=400, mode="determinate")
|
||||
progress_bar.pack(fill="x", pady=(0, 5))
|
||||
|
||||
progress_label = tk.Label(progress_frame, text="Ready to process video", font=("Arial", 9))
|
||||
progress_label.pack()
|
||||
|
||||
# Status Label
|
||||
status_label = tk.Label(frame, text="💡 Tip: Use generated SRT files with subtitle_generator2.py for advanced editing!", font=("Arial", 9), fg="blue")
|
||||
status_label.pack(pady=(10, 0))
|
||||
|
||||
root.mainloop()
|
||||
|
||||
@ -1,3 +0,0 @@
|
||||
1
|
||||
00:00:00,000 --> 00:00:02,500
|
||||
You're running
|
||||
37
test_whisper.py
Normal file
37
test_whisper.py
Normal file
@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script to verify faster_whisper integration
|
||||
"""
|
||||
|
||||
import os
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
def test_whisper_setup():
|
||||
"""Test if faster_whisper is working correctly"""
|
||||
print("🧪 Testing faster_whisper setup...")
|
||||
|
||||
try:
|
||||
# Try to initialize the smallest model
|
||||
print("📥 Loading tiny model (this might take a moment on first run)...")
|
||||
model = WhisperModel("tiny")
|
||||
print("✅ Successfully loaded Whisper tiny model!")
|
||||
|
||||
# Check available models
|
||||
available_models = ["tiny", "base", "small", "medium", "large"]
|
||||
print(f"🎯 Available models: {', '.join(available_models)}")
|
||||
|
||||
# Test basic functionality with a short audio
|
||||
print("🔍 Whisper model ready for transcription!")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}")
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
if test_whisper_setup():
|
||||
print("\n🎉 faster_whisper is ready to use!")
|
||||
print("💡 Your subtitle generator now has much better speech recognition!")
|
||||
else:
|
||||
print("\n⚠️ There might be an issue with faster_whisper setup")
|
||||
Loading…
Reference in New Issue
Block a user