Initial commit: ShortGenerator project setup

- Add shorts_generator.py and shorts_generator2.py (main scripts)
- Add README.md with project documentation
- Add requirements.txt with Python dependencies
- Add .gitignore to exclude video files and other unnecessary files
This commit is contained in:
klop51 2025-08-05 00:34:23 +02:00
commit 7536d7fb49
5 changed files with 451 additions and 0 deletions

61
.gitignore vendored Normal file
View File

@ -0,0 +1,61 @@
# Video files (exclude large video files from version control)
*.mp4
*.avi
*.mov
*.mkv
*.wmv
*.flv
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# Virtual environments
venv/
env/
ENV/
env.bak/
venv.bak/
# IDE
.vscode/
.idea/
*.swp
*.swo
*~
# OS
.DS_Store
Thumbs.db
# Output folders
shorts/
output/
temp/
# Font zip files
*.zip
# Logs
*.log

83
README.md Normal file
View File

@ -0,0 +1,83 @@
# ShortGenerator
A Python-based tool for automatically generating short-form videos from longer video content, optimized for platforms like TikTok, Instagram Reels, and YouTube Shorts.
## Features
- **Automatic loud moment detection**: Identifies exciting parts of videos using audio analysis
- **AI-powered transcription**: Uses Whisper for accurate speech-to-text conversion
- **Dynamic subtitles**: Creates engaging, word-by-word highlighted subtitles in the style of popular content creators
- **Vertical format optimization**: Automatically converts landscape videos to 9:16 aspect ratio
- **Batch processing**: Generate multiple shorts from a single video
## Requirements
- Python 3.8+
- FFmpeg
- Required Python packages (install with `pip install -r requirements.txt`):
- moviepy
- faster-whisper
- numpy
## Installation
1. Clone the repository:
```bash
git clone https://git.ptbox.org/klop51/ShortGenerator.git
cd ShortGenerator
```
2. Install dependencies:
```bash
pip install -r requirements.txt
```
3. Ensure FFmpeg is installed and accessible from your PATH
## Usage
### Basic usage:
```bash
python shorts_generator2.py your_video.mp4
```
This will:
1. Analyze your video for loud/exciting moments
2. Generate up to 3 short clips (5 seconds each by default)
3. Add dynamic subtitles with word-by-word highlighting
4. Save the shorts in the `shorts/` folder
### Customization
You can modify the script parameters:
- `max_clips`: Number of shorts to generate
- `chunk_duration`: Length of each short clip
- `threshold_db`: Audio loudness threshold for moment detection
- `output_folder`: Where to save generated shorts
## Output
Generated shorts will be:
- 1080x1920 resolution (9:16 aspect ratio)
- MP4 format with H.264 codec
- Include dynamic subtitles with highlighting effects
- Automatically cropped and centered from the original video
## File Structure
```
ShortGenerator/
├── shorts_generator2.py # Main script (latest version)
├── shorts_generator.py # Legacy version
├── fonts/ # Font files for subtitles
├── shorts/ # Generated short videos (gitignored)
└── README.md # This file
```
## Contributing
Feel free to submit issues and enhancement requests!
## License
This project is open source. Please check the license file for details.

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
moviepy>=1.0.3
faster-whisper>=0.10.0
numpy>=1.21.0

157
shorts_generator.py Normal file
View File

@ -0,0 +1,157 @@
import os
import numpy as np
from moviepy import VideoFileClip, TextClip, CompositeVideoClip
from faster_whisper import WhisperModel
def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
print("🔍 Analyzing audio...")
clip = VideoFileClip(video_path)
audio = clip.audio.to_soundarray(fps=44100)
volume = np.linalg.norm(audio, axis=1)
chunk_size = int(chunk_duration * 44100)
loud_chunks = []
max_db = -float('inf')
for i in range(0, len(volume), chunk_size):
chunk = volume[i:i+chunk_size]
db = 20 * np.log10(np.mean(chunk) + 1e-10)
max_db = max(max_db, db)
if db > threshold_db:
start = i / 44100
loud_chunks.append((start, min(start + chunk_duration, clip.duration)))
print(f"🔊 Max volume found: {max_db:.2f} dB, threshold: {threshold_db} dB")
print(f"📈 Found {len(loud_chunks)} loud moments")
clip.close()
return loud_chunks
def transcribe_and_extract_subtitles(video_path, start, end):
print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...")
model = WhisperModel("base", device="cpu", compute_type="int8")
segments, _ = model.transcribe(video_path, beam_size=5, language="en", vad_filter=True)
subtitles = []
for segment in segments:
if start <= segment.start <= end:
subtitles.append((segment.start - start, segment.end - start, segment.text))
return subtitles
def create_short_clip(video_path, start, end, subtitles, output_path):
print(f"🎬 Creating short: {output_path}")
clip = VideoFileClip(video_path).subclipped(start, end)
video_duration = clip.duration
print(f"📏 Video clip duration: {video_duration:.2f}s")
# Convert to vertical 9:16
vertical_clip = clip.resized(height=1920).cropped(width=1080, x_center=clip.w / 2)
clips = [vertical_clip]
for (s, e, text) in subtitles:
try:
# Ensure subtitle timing doesn't exceed video duration
subtitle_start = max(0, s)
subtitle_end = min(e, video_duration)
if subtitle_start >= video_duration or subtitle_end <= subtitle_start:
print(f"⚠️ Skipping subtitle outside video duration: {text[:30]}...")
continue
# Opus Clip style professional subtitles
words = text.strip().split()
if not words:
continue
# Break text into smaller chunks for better readability (max 3-4 words per line)
chunks = []
current_chunk = []
for word in words:
current_chunk.append(word)
if len(current_chunk) >= 3 or len(' '.join(current_chunk)) > 25:
chunks.append(' '.join(current_chunk))
current_chunk = []
if current_chunk:
chunks.append(' '.join(current_chunk))
# Position subtitles in the center-bottom area (Opus style)
subtitle_position = 0.75
# Create subtitle for each chunk with smooth transitions
chunk_duration = (subtitle_end - subtitle_start) / len(chunks)
for chunk_idx, chunk_text in enumerate(chunks):
chunk_start = subtitle_start + (chunk_idx * chunk_duration)
chunk_end = min(chunk_start + chunk_duration, subtitle_end)
chunk_words = chunk_text.split()
# Base subtitle with Opus-style design (bold white text with strong outline)
base_subtitle = TextClip(
text=chunk_text.upper(),
font='C:/Windows/Fonts/LatoWeb-Bold.ttf', # Lato Bold - excellent for subtitles
font_size=65, # Larger, chunkier text
color='white',
stroke_color='black',
stroke_width=5 # Thicker outline for better readability
)
base_subtitle = base_subtitle.with_start(chunk_start).with_end(chunk_end).with_position(('center', subtitle_position), relative=True)
clips.append(base_subtitle)
# Opus-style word-by-word highlighting (yellow/orange like Opus)
word_duration = chunk_duration / len(chunk_words)
for i, word in enumerate(chunk_words):
word_start = chunk_start + (i * word_duration)
word_end = min(word_start + word_duration * 0.8, chunk_end)
# Opus-style highlighted word (vibrant yellow/orange)
highlighted_word = TextClip(
text=word.upper(),
font='C:/Windows/Fonts/LatoWeb-Bold.ttf', # Lato Bold for consistency
font_size=68, # Slightly larger for highlight effect
color='#FFD700', # Gold/yellow like Opus Clip
stroke_color='#FF6B35', # Orange outline for pop
stroke_width=5
)
# Calculate precise word positioning within the chunk
words_before = chunk_words[:i]
chars_before = sum(len(w) for w in words_before) + len(words_before)
# More accurate character width calculation for Arial Bold
char_width = 35 # Adjusted for larger, bolder font
total_chunk_width = len(chunk_text) * char_width
word_x_offset = (chars_before * char_width) - (total_chunk_width // 2)
highlighted_word = highlighted_word.with_start(word_start).with_end(word_end).with_position((540 + word_x_offset, subtitle_position), relative=(False, True))
clips.append(highlighted_word)
print(f"✅ Added Opus-style subtitle ({subtitle_start:.1f}s-{subtitle_end:.1f}s): {text[:30]}...")
except Exception as e:
print(f"⚠️ Subtitle error: {e}, skipping subtitle: {text[:50]}...")
continue
final = CompositeVideoClip(clips, size=(1080, 1920))
final.write_videofile(output_path, codec="libx264", audio_codec="aac", threads=1)
# 💥 Force close to avoid Windows pipe errors
clip.reader.close()
if clip.audio:
clip.audio.reader.close()
final.close()
def generate_shorts(video_path, max_clips=3, output_folder="shorts"):
os.makedirs(output_folder, exist_ok=True)
best_moments = detect_loud_moments(video_path, threshold_db=-30)
selected = best_moments[:max_clips]
for i, (start, end) in enumerate(selected):
subtitles = transcribe_and_extract_subtitles(video_path, start, end)
out_path = os.path.join(output_folder, f"short_{i+1}.mp4")
create_short_clip(video_path, start, end, subtitles, out_path)
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("Usage: python shorts_generator.py your_video.mp4")
else:
generate_shorts(sys.argv[1])

147
shorts_generator2.py Normal file
View File

@ -0,0 +1,147 @@
import os
import numpy as np
from moviepy import VideoFileClip, TextClip, CompositeVideoClip
from faster_whisper import WhisperModel
def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
print("🔍 Analyzing audio...")
clip = VideoFileClip(video_path)
audio = clip.audio.to_soundarray(fps=44100)
volume = np.linalg.norm(audio, axis=1)
chunk_size = int(chunk_duration * 44100)
loud_chunks = []
max_db = -float('inf')
for i in range(0, len(volume), chunk_size):
chunk = volume[i:i+chunk_size]
db = 20 * np.log10(np.mean(chunk) + 1e-10)
max_db = max(max_db, db)
if db > threshold_db:
start = i / 44100
loud_chunks.append((start, min(start + chunk_duration, clip.duration)))
print(f"🔊 Max volume found: {max_db:.2f} dB, threshold: {threshold_db} dB")
print(f"📈 Found {len(loud_chunks)} loud moments")
clip.close()
return loud_chunks
def transcribe_and_extract_subtitles(video_path, start, end):
print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...")
model = WhisperModel("base", device="cpu", compute_type="int8")
segments, _ = model.transcribe(video_path, beam_size=5, language="en", vad_filter=True)
subtitles = []
for segment in segments:
if start <= segment.start <= end:
subtitles.append((segment.start - start, segment.end - start, segment.text))
return subtitles
def create_short_clip(video_path, start, end, subtitles, output_path):
print(f"🎬 Creating short: {output_path}")
clip = VideoFileClip(video_path).subclipped(start, end)
video_duration = clip.duration
print(f"📏 Video clip duration: {video_duration:.2f}s")
vertical_clip = clip.resized(height=1920).cropped(width=1080, x_center=clip.w / 2)
clips = [vertical_clip]
subtitle_y_px = 1550 # Fixed Y position for subtitles
for (s, e, text) in subtitles:
try:
subtitle_start = max(0, s)
subtitle_end = min(e, video_duration)
if subtitle_start >= video_duration or subtitle_end <= subtitle_start:
print(f"⚠️ Skipping subtitle outside video duration: {text[:30]}...")
continue
words = text.strip().split()
if not words:
continue
# Split into small readable chunks (max ~3-4 words)
chunks = []
current_chunk = []
for word in words:
current_chunk.append(word)
if len(current_chunk) >= 2 or len(' '.join(current_chunk)) > 25:
chunks.append(' '.join(current_chunk))
current_chunk = []
if current_chunk:
chunks.append(' '.join(current_chunk))
chunk_duration = (subtitle_end - subtitle_start) / len(chunks)
for chunk_idx, chunk_text in enumerate(chunks):
chunk_start = subtitle_start + (chunk_idx * chunk_duration)
chunk_end = min(chunk_start + chunk_duration, subtitle_end)
chunk_words = chunk_text.split()
# Base subtitle
base_subtitle = TextClip(
text=chunk_text.upper(),
font='C:/Windows/Fonts/LatoWeb-Bold.ttf',
font_size=65,
color='white',
stroke_color='black',
stroke_width=5
)
text_width, _ = base_subtitle.size
base_subtitle = base_subtitle.with_start(chunk_start).with_end(chunk_end).with_position(('center', subtitle_y_px))
clips.append(base_subtitle)
# Highlighted words (perfectly aligned)
word_duration = chunk_duration / len(chunk_words)
current_x = 540 - (text_width / 2) # 540 is center X of 1080px width
for i, word in enumerate(chunk_words):
word_start = chunk_start + (i * word_duration)
word_end = min(word_start + word_duration * 0.8, chunk_end)
highlighted_word = TextClip(
text=word.upper(),
font='C:/Windows/Fonts/LatoWeb-Bold.ttf',
font_size=68,
color='#FFD700',
stroke_color='#FF6B35',
stroke_width=5
)
word_width, _ = highlighted_word.size
word_x = current_x + (word_width / 2)
highlighted_word = highlighted_word.with_start(word_start).with_end(word_end).with_position((word_x -8, subtitle_y_px))
clips.append(highlighted_word)
current_x += word_width + 20 # Add spacing between words
print(f"✅ Added Opus-style subtitle ({subtitle_start:.1f}s-{subtitle_end:.1f}s): {text[:30]}...")
except Exception as e:
print(f"⚠️ Subtitle error: {e}, skipping subtitle: {text[:50]}...")
continue
final = CompositeVideoClip(clips, size=(1080, 1920))
final.write_videofile(output_path, codec="libx264", audio_codec="aac", threads=1)
clip.reader.close()
if clip.audio:
clip.audio.reader.close()
final.close()
def generate_shorts(video_path, max_clips=3, output_folder="shorts"):
os.makedirs(output_folder, exist_ok=True)
best_moments = detect_loud_moments(video_path, threshold_db=-30)
selected = best_moments[:max_clips]
for i, (start, end) in enumerate(selected):
subtitles = transcribe_and_extract_subtitles(video_path, start, end)
out_path = os.path.join(output_folder, f"short_{i+1}.mp4")
create_short_clip(video_path, start, end, subtitles, out_path)
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("Usage: python shorts_generator.py your_video.mp4")
else:
generate_shorts(sys.argv[1])