Initial commit: ShortGenerator project setup

- Add shorts_generator.py and shorts_generator2.py (main scripts) - Add README.md with project documentation - Add requirements.txt with Python dependencies - Add .gitignore to exclude video files and other unnecessary files
2025-08-05 00:34:23 +02:00 · 2025-08-05 00:34:23 +02:00 · 7536d7fb49
commit 7536d7fb49
5 changed files with 451 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,61 @@
+# Video files (exclude large video files from version control)
+*.mp4
+*.avi
+*.mov
+*.mkv
+*.wmv
+*.flv
+
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# Virtual environments
+venv/
+env/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Output folders
+shorts/
+output/
+temp/
+
+# Font zip files
+*.zip
+
+# Logs
+*.log
--- a/README.md
+++ b/README.md
@ -0,0 +1,83 @@
+# ShortGenerator
+
+A Python-based tool for automatically generating short-form videos from longer video content, optimized for platforms like TikTok, Instagram Reels, and YouTube Shorts.
+
+## Features
+
+- **Automatic loud moment detection**: Identifies exciting parts of videos using audio analysis
+- **AI-powered transcription**: Uses Whisper for accurate speech-to-text conversion
+- **Dynamic subtitles**: Creates engaging, word-by-word highlighted subtitles in the style of popular content creators
+- **Vertical format optimization**: Automatically converts landscape videos to 9:16 aspect ratio
+- **Batch processing**: Generate multiple shorts from a single video
+
+## Requirements
+
+- Python 3.8+
+- FFmpeg
+- Required Python packages (install with `pip install -r requirements.txt`):
+  - moviepy
+  - faster-whisper
+  - numpy
+
+## Installation
+
+1. Clone the repository:
+```bash
+git clone https://git.ptbox.org/klop51/ShortGenerator.git
+cd ShortGenerator
+```
+
+2. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+
+3. Ensure FFmpeg is installed and accessible from your PATH
+
+## Usage
+
+### Basic usage:
+```bash
+python shorts_generator2.py your_video.mp4
+```
+
+This will:
+1. Analyze your video for loud/exciting moments
+2. Generate up to 3 short clips (5 seconds each by default)
+3. Add dynamic subtitles with word-by-word highlighting
+4. Save the shorts in the `shorts/` folder
+
+### Customization
+
+You can modify the script parameters:
+- `max_clips`: Number of shorts to generate
+- `chunk_duration`: Length of each short clip
+- `threshold_db`: Audio loudness threshold for moment detection
+- `output_folder`: Where to save generated shorts
+
+## Output
+
+Generated shorts will be:
+- 1080x1920 resolution (9:16 aspect ratio)
+- MP4 format with H.264 codec
+- Include dynamic subtitles with highlighting effects
+- Automatically cropped and centered from the original video
+
+## File Structure
+
+```
+ShortGenerator/
+├── shorts_generator2.py    # Main script (latest version)
+├── shorts_generator.py     # Legacy version
+├── fonts/                  # Font files for subtitles
+├── shorts/                 # Generated short videos (gitignored)
+└── README.md              # This file
+```
+
+## Contributing
+
+Feel free to submit issues and enhancement requests!
+
+## License
+
+This project is open source. Please check the license file for details.
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,3 @@
+moviepy>=1.0.3
+faster-whisper>=0.10.0
+numpy>=1.21.0
--- a/shorts_generator.py
+++ b/shorts_generator.py
@ -0,0 +1,157 @@
+import os
+import numpy as np
+from moviepy import VideoFileClip, TextClip, CompositeVideoClip
+from faster_whisper import WhisperModel
+
+def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
+    print("🔍 Analyzing audio...")
+    clip = VideoFileClip(video_path)
+    audio = clip.audio.to_soundarray(fps=44100)
+    volume = np.linalg.norm(audio, axis=1)
+    chunk_size = int(chunk_duration * 44100)
+
+    loud_chunks = []
+    max_db = -float('inf')
+    for i in range(0, len(volume), chunk_size):
+        chunk = volume[i:i+chunk_size]
+        db = 20 * np.log10(np.mean(chunk) + 1e-10)
+        max_db = max(max_db, db)
+        if db > threshold_db:
+            start = i / 44100
+            loud_chunks.append((start, min(start + chunk_duration, clip.duration)))
+    
+    print(f"🔊 Max volume found: {max_db:.2f} dB, threshold: {threshold_db} dB")
+    print(f"📈 Found {len(loud_chunks)} loud moments")
+    clip.close()
+    return loud_chunks
+
+def transcribe_and_extract_subtitles(video_path, start, end):
+    print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...")
+    model = WhisperModel("base", device="cpu", compute_type="int8")
+    segments, _ = model.transcribe(video_path, beam_size=5, language="en", vad_filter=True)
+
+    subtitles = []
+    for segment in segments:
+        if start <= segment.start <= end:
+            subtitles.append((segment.start - start, segment.end - start, segment.text))
+    return subtitles
+
+def create_short_clip(video_path, start, end, subtitles, output_path):
+    print(f"🎬 Creating short: {output_path}")
+    clip = VideoFileClip(video_path).subclipped(start, end)
+    video_duration = clip.duration
+    print(f"📏 Video clip duration: {video_duration:.2f}s")
+
+    # Convert to vertical 9:16
+    vertical_clip = clip.resized(height=1920).cropped(width=1080, x_center=clip.w / 2)
+
+    clips = [vertical_clip]
+    for (s, e, text) in subtitles:
+        try:
+            # Ensure subtitle timing doesn't exceed video duration
+            subtitle_start = max(0, s)
+            subtitle_end = min(e, video_duration)
+            
+            if subtitle_start >= video_duration or subtitle_end <= subtitle_start:
+                print(f"⚠️ Skipping subtitle outside video duration: {text[:30]}...")
+                continue
+            
+            # Opus Clip style professional subtitles
+            words = text.strip().split()
+            if not words:
+                continue
+            
+            # Break text into smaller chunks for better readability (max 3-4 words per line)
+            chunks = []
+            current_chunk = []
+            for word in words:
+                current_chunk.append(word)
+                if len(current_chunk) >= 3 or len(' '.join(current_chunk)) > 25:
+                    chunks.append(' '.join(current_chunk))
+                    current_chunk = []
+            if current_chunk:
+                chunks.append(' '.join(current_chunk))
+            
+            # Position subtitles in the center-bottom area (Opus style)
+            subtitle_position = 0.75
+            
+            # Create subtitle for each chunk with smooth transitions
+            chunk_duration = (subtitle_end - subtitle_start) / len(chunks)
+            
+            for chunk_idx, chunk_text in enumerate(chunks):
+                chunk_start = subtitle_start + (chunk_idx * chunk_duration)
+                chunk_end = min(chunk_start + chunk_duration, subtitle_end)
+                
+                chunk_words = chunk_text.split()
+                
+                # Base subtitle with Opus-style design (bold white text with strong outline)
+                base_subtitle = TextClip(
+                    text=chunk_text.upper(), 
+                    font='C:/Windows/Fonts/LatoWeb-Bold.ttf',  # Lato Bold - excellent for subtitles
+                    font_size=65,  # Larger, chunkier text
+                    color='white', 
+                    stroke_color='black', 
+                    stroke_width=5  # Thicker outline for better readability
+                )
+                base_subtitle = base_subtitle.with_start(chunk_start).with_end(chunk_end).with_position(('center', subtitle_position), relative=True)
+                clips.append(base_subtitle)
+                
+                # Opus-style word-by-word highlighting (yellow/orange like Opus)
+                word_duration = chunk_duration / len(chunk_words)
+                
+                for i, word in enumerate(chunk_words):
+                    word_start = chunk_start + (i * word_duration)
+                    word_end = min(word_start + word_duration * 0.8, chunk_end)
+                    
+                    # Opus-style highlighted word (vibrant yellow/orange)
+                    highlighted_word = TextClip(
+                        text=word.upper(), 
+                        font='C:/Windows/Fonts/LatoWeb-Bold.ttf',  # Lato Bold for consistency
+                        font_size=68,  # Slightly larger for highlight effect
+                        color='#FFD700',  # Gold/yellow like Opus Clip
+                        stroke_color='#FF6B35',  # Orange outline for pop
+                        stroke_width=5
+                    )
+                    
+                    # Calculate precise word positioning within the chunk
+                    words_before = chunk_words[:i]
+                    chars_before = sum(len(w) for w in words_before) + len(words_before)
+                    
+                    # More accurate character width calculation for Arial Bold
+                    char_width = 35  # Adjusted for larger, bolder font
+                    total_chunk_width = len(chunk_text) * char_width
+                    word_x_offset = (chars_before * char_width) - (total_chunk_width // 2)
+                    
+                    highlighted_word = highlighted_word.with_start(word_start).with_end(word_end).with_position((540 + word_x_offset, subtitle_position), relative=(False, True))
+                    clips.append(highlighted_word)
+                
+            print(f"✅ Added Opus-style subtitle ({subtitle_start:.1f}s-{subtitle_end:.1f}s): {text[:30]}...")
+        except Exception as e:
+            print(f"⚠️ Subtitle error: {e}, skipping subtitle: {text[:50]}...")
+            continue
+
+    final = CompositeVideoClip(clips, size=(1080, 1920))
+    final.write_videofile(output_path, codec="libx264", audio_codec="aac", threads=1)
+
+    # 💥 Force close to avoid Windows pipe errors
+    clip.reader.close()
+    if clip.audio:
+        clip.audio.reader.close()
+    final.close()
+
+def generate_shorts(video_path, max_clips=3, output_folder="shorts"):
+    os.makedirs(output_folder, exist_ok=True)
+    best_moments = detect_loud_moments(video_path, threshold_db=-30)
+
+    selected = best_moments[:max_clips]
+    for i, (start, end) in enumerate(selected):
+        subtitles = transcribe_and_extract_subtitles(video_path, start, end)
+        out_path = os.path.join(output_folder, f"short_{i+1}.mp4")
+        create_short_clip(video_path, start, end, subtitles, out_path)
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) < 2:
+        print("Usage: python shorts_generator.py your_video.mp4")
+    else:
+        generate_shorts(sys.argv[1])
--- a/shorts_generator2.py
+++ b/shorts_generator2.py
@ -0,0 +1,147 @@
+import os
+import numpy as np
+from moviepy import VideoFileClip, TextClip, CompositeVideoClip
+from faster_whisper import WhisperModel
+
+def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
+    print("🔍 Analyzing audio...")
+    clip = VideoFileClip(video_path)
+    audio = clip.audio.to_soundarray(fps=44100)
+    volume = np.linalg.norm(audio, axis=1)
+    chunk_size = int(chunk_duration * 44100)
+
+    loud_chunks = []
+    max_db = -float('inf')
+    for i in range(0, len(volume), chunk_size):
+        chunk = volume[i:i+chunk_size]
+        db = 20 * np.log10(np.mean(chunk) + 1e-10)
+        max_db = max(max_db, db)
+        if db > threshold_db:
+            start = i / 44100
+            loud_chunks.append((start, min(start + chunk_duration, clip.duration)))
+
+    print(f"🔊 Max volume found: {max_db:.2f} dB, threshold: {threshold_db} dB")
+    print(f"📈 Found {len(loud_chunks)} loud moments")
+    clip.close()
+    return loud_chunks
+
+def transcribe_and_extract_subtitles(video_path, start, end):
+    print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...")
+    model = WhisperModel("base", device="cpu", compute_type="int8")
+    segments, _ = model.transcribe(video_path, beam_size=5, language="en", vad_filter=True)
+
+    subtitles = []
+    for segment in segments:
+        if start <= segment.start <= end:
+            subtitles.append((segment.start - start, segment.end - start, segment.text))
+    return subtitles
+
+def create_short_clip(video_path, start, end, subtitles, output_path):
+    print(f"🎬 Creating short: {output_path}")
+    clip = VideoFileClip(video_path).subclipped(start, end)
+    video_duration = clip.duration
+    print(f"📏 Video clip duration: {video_duration:.2f}s")
+
+    vertical_clip = clip.resized(height=1920).cropped(width=1080, x_center=clip.w / 2)
+    clips = [vertical_clip]
+
+    subtitle_y_px = 1550  # Fixed Y position for subtitles
+
+    for (s, e, text) in subtitles:
+        try:
+            subtitle_start = max(0, s)
+            subtitle_end = min(e, video_duration)
+
+            if subtitle_start >= video_duration or subtitle_end <= subtitle_start:
+                print(f"⚠️ Skipping subtitle outside video duration: {text[:30]}...")
+                continue
+
+            words = text.strip().split()
+            if not words:
+                continue
+
+            # Split into small readable chunks (max ~3-4 words)
+            chunks = []
+            current_chunk = []
+            for word in words:
+                current_chunk.append(word)
+                if len(current_chunk) >= 2 or len(' '.join(current_chunk)) > 25:
+                    chunks.append(' '.join(current_chunk))
+                    current_chunk = []
+            if current_chunk:
+                chunks.append(' '.join(current_chunk))
+
+            chunk_duration = (subtitle_end - subtitle_start) / len(chunks)
+
+            for chunk_idx, chunk_text in enumerate(chunks):
+                chunk_start = subtitle_start + (chunk_idx * chunk_duration)
+                chunk_end = min(chunk_start + chunk_duration, subtitle_end)
+
+                chunk_words = chunk_text.split()
+
+                # Base subtitle
+                base_subtitle = TextClip(
+                    text=chunk_text.upper(),
+                    font='C:/Windows/Fonts/LatoWeb-Bold.ttf',
+                    font_size=65,
+                    color='white',
+                    stroke_color='black',
+                    stroke_width=5
+                )
+                text_width, _ = base_subtitle.size
+                base_subtitle = base_subtitle.with_start(chunk_start).with_end(chunk_end).with_position(('center', subtitle_y_px))
+                clips.append(base_subtitle)
+
+                # Highlighted words (perfectly aligned)
+                word_duration = chunk_duration / len(chunk_words)
+                current_x = 540 - (text_width / 2)  # 540 is center X of 1080px width
+
+                for i, word in enumerate(chunk_words):
+                    word_start = chunk_start + (i * word_duration)
+                    word_end = min(word_start + word_duration * 0.8, chunk_end)
+
+                    highlighted_word = TextClip(
+                        text=word.upper(),
+                        font='C:/Windows/Fonts/LatoWeb-Bold.ttf',
+                        font_size=68,
+                        color='#FFD700',
+                        stroke_color='#FF6B35',
+                        stroke_width=5
+                    )
+                    word_width, _ = highlighted_word.size
+
+                    word_x = current_x + (word_width / 2)
+                    highlighted_word = highlighted_word.with_start(word_start).with_end(word_end).with_position((word_x -8, subtitle_y_px))
+                    clips.append(highlighted_word)
+
+                    current_x += word_width + 20  # Add spacing between words
+
+            print(f"✅ Added Opus-style subtitle ({subtitle_start:.1f}s-{subtitle_end:.1f}s): {text[:30]}...")
+        except Exception as e:
+            print(f"⚠️ Subtitle error: {e}, skipping subtitle: {text[:50]}...")
+            continue
+
+    final = CompositeVideoClip(clips, size=(1080, 1920))
+    final.write_videofile(output_path, codec="libx264", audio_codec="aac", threads=1)
+
+    clip.reader.close()
+    if clip.audio:
+        clip.audio.reader.close()
+    final.close()
+
+def generate_shorts(video_path, max_clips=3, output_folder="shorts"):
+    os.makedirs(output_folder, exist_ok=True)
+    best_moments = detect_loud_moments(video_path, threshold_db=-30)
+    selected = best_moments[:max_clips]
+    for i, (start, end) in enumerate(selected):
+        subtitles = transcribe_and_extract_subtitles(video_path, start, end)
+        out_path = os.path.join(output_folder, f"short_{i+1}.mp4")
+        create_short_clip(video_path, start, end, subtitles, out_path)
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) < 2:
+        print("Usage: python shorts_generator.py your_video.mp4")
+    else:
+        generate_shorts(sys.argv[1])
+