import os import numpy as np from moviepy import VideoFileClip, TextClip, CompositeVideoClip from faster_whisper import WhisperModel def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10): print("🔍 Analyzing audio...") clip = VideoFileClip(video_path) audio = clip.audio.to_soundarray(fps=44100) volume = np.linalg.norm(audio, axis=1) chunk_size = int(chunk_duration * 44100) loud_chunks = [] max_db = -float('inf') for i in range(0, len(volume), chunk_size): chunk = volume[i:i+chunk_size] db = 20 * np.log10(np.mean(chunk) + 1e-10) max_db = max(max_db, db) if db > threshold_db: start = i / 44100 loud_chunks.append((start, min(start + chunk_duration, clip.duration))) print(f"🔊 Max volume found: {max_db:.2f} dB, threshold: {threshold_db} dB") print(f"📈 Found {len(loud_chunks)} loud moments") clip.close() return loud_chunks def transcribe_and_extract_subtitles(video_path, start, end): print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...") model = WhisperModel("base", device="cpu", compute_type="int8") segments, _ = model.transcribe(video_path, beam_size=5, language="en", vad_filter=True) subtitles = [] for segment in segments: if start <= segment.start <= end: subtitles.append((segment.start - start, segment.end - start, segment.text)) return subtitles def create_short_clip(video_path, start, end, subtitles, output_path): print(f"🎬 Creating short: {output_path}") clip = VideoFileClip(video_path).subclipped(start, end) video_duration = clip.duration print(f"📏 Video clip duration: {video_duration:.2f}s") # Convert to vertical 9:16 vertical_clip = clip.resized(height=1920).cropped(width=1080, x_center=clip.w / 2) clips = [vertical_clip] for (s, e, text) in subtitles: try: # Ensure subtitle timing doesn't exceed video duration subtitle_start = max(0, s) subtitle_end = min(e, video_duration) if subtitle_start >= video_duration or subtitle_end <= subtitle_start: print(f"⚠️ Skipping subtitle outside video duration: {text[:30]}...") continue # Opus Clip style professional subtitles words = text.strip().split() if not words: continue # Break text into smaller chunks for better readability (max 3-4 words per line) chunks = [] current_chunk = [] for word in words: current_chunk.append(word) if len(current_chunk) >= 3 or len(' '.join(current_chunk)) > 25: chunks.append(' '.join(current_chunk)) current_chunk = [] if current_chunk: chunks.append(' '.join(current_chunk)) # Position subtitles in the center-bottom area (Opus style) subtitle_position = 0.75 # Create subtitle for each chunk with smooth transitions chunk_duration = (subtitle_end - subtitle_start) / len(chunks) for chunk_idx, chunk_text in enumerate(chunks): chunk_start = subtitle_start + (chunk_idx * chunk_duration) chunk_end = min(chunk_start + chunk_duration, subtitle_end) chunk_words = chunk_text.split() # Base subtitle with Opus-style design (bold white text with strong outline) base_subtitle = TextClip( text=chunk_text.upper(), font='C:/Windows/Fonts/LatoWeb-Bold.ttf', # Lato Bold - excellent for subtitles font_size=65, # Larger, chunkier text color='white', stroke_color='black', stroke_width=5 # Thicker outline for better readability ) base_subtitle = base_subtitle.with_start(chunk_start).with_end(chunk_end).with_position(('center', subtitle_position), relative=True) clips.append(base_subtitle) # Opus-style word-by-word highlighting (yellow/orange like Opus) word_duration = chunk_duration / len(chunk_words) for i, word in enumerate(chunk_words): word_start = chunk_start + (i * word_duration) word_end = min(word_start + word_duration * 0.8, chunk_end) # Opus-style highlighted word (vibrant yellow/orange) highlighted_word = TextClip( text=word.upper(), font='C:/Windows/Fonts/LatoWeb-Bold.ttf', # Lato Bold for consistency font_size=68, # Slightly larger for highlight effect color='#FFD700', # Gold/yellow like Opus Clip stroke_color='#FF6B35', # Orange outline for pop stroke_width=5 ) # Calculate precise word positioning within the chunk words_before = chunk_words[:i] chars_before = sum(len(w) for w in words_before) + len(words_before) # More accurate character width calculation for Arial Bold char_width = 35 # Adjusted for larger, bolder font total_chunk_width = len(chunk_text) * char_width word_x_offset = (chars_before * char_width) - (total_chunk_width // 2) highlighted_word = highlighted_word.with_start(word_start).with_end(word_end).with_position((540 + word_x_offset, subtitle_position), relative=(False, True)) clips.append(highlighted_word) print(f"✅ Added Opus-style subtitle ({subtitle_start:.1f}s-{subtitle_end:.1f}s): {text[:30]}...") except Exception as e: print(f"⚠️ Subtitle error: {e}, skipping subtitle: {text[:50]}...") continue final = CompositeVideoClip(clips, size=(1080, 1920)) final.write_videofile(output_path, codec="libx264", audio_codec="aac", threads=1) # 💥 Force close to avoid Windows pipe errors clip.reader.close() if clip.audio: clip.audio.reader.close() final.close() def generate_shorts(video_path, max_clips=3, output_folder="shorts"): os.makedirs(output_folder, exist_ok=True) best_moments = detect_loud_moments(video_path, threshold_db=-30) selected = best_moments[:max_clips] for i, (start, end) in enumerate(selected): subtitles = transcribe_and_extract_subtitles(video_path, start, end) out_path = os.path.join(output_folder, f"short_{i+1}.mp4") create_short_clip(video_path, start, end, subtitles, out_path) if __name__ == "__main__": import sys if len(sys.argv) < 2: print("Usage: python shorts_generator.py your_video.mp4") else: generate_shorts(sys.argv[1])