ShortGenerator/subtitle_extrator.py

import os
import numpy as np
from moviepy import VideoFileClip, TextClip, CompositeVideoClip
from faster_whisper import WhisperModel

def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
    print("🔍 Analyzing audio...")
    clip = VideoFileClip(video_path)
    audio = clip.audio.to_soundarray(fps=44100)
    volume = np.linalg.norm(audio, axis=1)
    chunk_size = int(chunk_duration * 44100)

    loud_chunks = []
    max_db = -float('inf')
    for i in range(0, len(volume), chunk_size):
        chunk = volume[i:i+chunk_size]
        db = 20 * np.log10(np.mean(chunk) + 1e-10)
        max_db = max(max_db, db)
        if db > threshold_db:
            start = i / 44100
            loud_chunks.append((start, min(start + chunk_duration, clip.duration)))

    print(f"🔊 Max volume found: {max_db:.2f} dB, threshold: {threshold_db} dB")
    print(f"📈 Found {len(loud_chunks)} loud moments")
    clip.close()
    return loud_chunks

def transcribe_and_extract_subtitles(video_path, start, end):
    print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...")
    model = WhisperModel("base", device="cpu", compute_type="int8")
    segments, _ = model.transcribe(video_path, beam_size=5, language="en", vad_filter=True)

    subtitles = []
    for segment in segments:
        if start <= segment.start <= end:
            subtitles.append((segment.start - start, segment.end - start, segment.text))
    return subtitles

def create_short_clip(video_path, start, end, subtitles, output_path):
    print(f"🎬 Creating short: {output_path}")
    clip = VideoFileClip(video_path).subclipped(start, end)
    video_duration = clip.duration
    print(f"📏 Video clip duration: {video_duration:.2f}s")

    # Convert to vertical 9:16
    vertical_clip = clip.resized(height=1920).cropped(width=1080, x_center=clip.w / 2)

    clips = [vertical_clip]
    for (s, e, text) in subtitles:
        try:
            # Ensure subtitle timing doesn't exceed video duration
            subtitle_start = max(0, s)
            subtitle_end = min(e, video_duration)

            if subtitle_start >= video_duration or subtitle_end <= subtitle_start:
                print(f"⚠️ Skipping subtitle outside video duration: {text[:30]}...")
                continue

            # Opus Clip style professional subtitles
            words = text.strip().split()
            if not words:
                continue

            # Break text into smaller chunks for better readability (max 3-4 words per line)
            chunks = []
            current_chunk = []
            for word in words:
                current_chunk.append(word)
                if len(current_chunk) >= 3 or len(' '.join(current_chunk)) > 25:
                    chunks.append(' '.join(current_chunk))
                    current_chunk = []
            if current_chunk:
                chunks.append(' '.join(current_chunk))

            # Position subtitles in the center-bottom area (Opus style)
            subtitle_position = 0.75

            # Create subtitle for each chunk with smooth transitions
            chunk_duration = (subtitle_end - subtitle_start) / len(chunks)

            for chunk_idx, chunk_text in enumerate(chunks):
                chunk_start = subtitle_start + (chunk_idx * chunk_duration)
                chunk_end = min(chunk_start + chunk_duration, subtitle_end)

                chunk_words = chunk_text.split()

                # Base subtitle with Opus-style design (bold white text with strong outline)
                base_subtitle = TextClip(
                    text=chunk_text.upper(),
                    font='C:/Windows/Fonts/LatoWeb-Bold.ttf',  # Lato Bold - excellent for subtitles
                    font_size=65,  # Larger, chunkier text
                    color='white',
                    stroke_color='black',
                    stroke_width=5  # Thicker outline for better readability
                )
                base_subtitle = base_subtitle.with_start(chunk_start).with_end(chunk_end).with_position(('center', subtitle_position), relative=True)
                clips.append(base_subtitle)

                # Opus-style word-by-word highlighting (yellow/orange like Opus)
                word_duration = chunk_duration / len(chunk_words)

                for i, word in enumerate(chunk_words):
                    word_start = chunk_start + (i * word_duration)
                    word_end = min(word_start + word_duration * 0.8, chunk_end)

                    # Opus-style highlighted word (vibrant yellow/orange)
                    highlighted_word = TextClip(
                        text=word.upper(),
                        font='C:/Windows/Fonts/LatoWeb-Bold.ttf',  # Lato Bold for consistency
                        font_size=68,  # Slightly larger for highlight effect
                        color='#FFD700',  # Gold/yellow like Opus Clip
                        stroke_color='#FF6B35',  # Orange outline for pop
                        stroke_width=5
                    )

                    # Calculate precise word positioning within the chunk
                    words_before = chunk_words[:i]
                    chars_before = sum(len(w) for w in words_before) + len(words_before)

                    # More accurate character width calculation for Arial Bold
                    char_width = 35  # Adjusted for larger, bolder font
                    total_chunk_width = len(chunk_text) * char_width
                    word_x_offset = (chars_before * char_width) - (total_chunk_width // 2)

                    highlighted_word = highlighted_word.with_start(word_start).with_end(word_end).with_position((540 + word_x_offset, subtitle_position), relative=(False, True))
                    clips.append(highlighted_word)

            print(f"✅ Added Opus-style subtitle ({subtitle_start:.1f}s-{subtitle_end:.1f}s): {text[:30]}...")
        except Exception as e:
            print(f"⚠️ Subtitle error: {e}, skipping subtitle: {text[:50]}...")
            continue

    final = CompositeVideoClip(clips, size=(1080, 1920))
    final.write_videofile(output_path, codec="libx264", audio_codec="aac", threads=1)

    # 💥 Force close to avoid Windows pipe errors
    clip.reader.close()
    if clip.audio:
        clip.audio.reader.close()
    final.close()

def generate_shorts(video_path, max_clips=3, output_folder="shorts"):
    os.makedirs(output_folder, exist_ok=True)
    best_moments = detect_loud_moments(video_path, threshold_db=-30)

    selected = best_moments[:max_clips]
    for i, (start, end) in enumerate(selected):
        subtitles = transcribe_and_extract_subtitles(video_path, start, end)
        out_path = os.path.join(output_folder, f"short_{i+1}.mp4")
        create_short_clip(video_path, start, end, subtitles, out_path)

if __name__ == "__main__":
    import sys
    if len(sys.argv) < 2:
        print("Usage: python shorts_generator.py your_video.mp4")
    else:
        generate_shorts(sys.argv[1])