Initial commit: ShortGenerator project setup
- Add shorts_generator.py and shorts_generator2.py (main scripts) - Add README.md with project documentation - Add requirements.txt with Python dependencies - Add .gitignore to exclude video files and other unnecessary files
This commit is contained in:
commit
7536d7fb49
61
.gitignore
vendored
Normal file
61
.gitignore
vendored
Normal file
@ -0,0 +1,61 @@
|
||||
# Video files (exclude large video files from version control)
|
||||
*.mp4
|
||||
*.avi
|
||||
*.mov
|
||||
*.mkv
|
||||
*.wmv
|
||||
*.flv
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
pip-wheel-metadata/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# Virtual environments
|
||||
venv/
|
||||
env/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Output folders
|
||||
shorts/
|
||||
output/
|
||||
temp/
|
||||
|
||||
# Font zip files
|
||||
*.zip
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
83
README.md
Normal file
83
README.md
Normal file
@ -0,0 +1,83 @@
|
||||
# ShortGenerator
|
||||
|
||||
A Python-based tool for automatically generating short-form videos from longer video content, optimized for platforms like TikTok, Instagram Reels, and YouTube Shorts.
|
||||
|
||||
## Features
|
||||
|
||||
- **Automatic loud moment detection**: Identifies exciting parts of videos using audio analysis
|
||||
- **AI-powered transcription**: Uses Whisper for accurate speech-to-text conversion
|
||||
- **Dynamic subtitles**: Creates engaging, word-by-word highlighted subtitles in the style of popular content creators
|
||||
- **Vertical format optimization**: Automatically converts landscape videos to 9:16 aspect ratio
|
||||
- **Batch processing**: Generate multiple shorts from a single video
|
||||
|
||||
## Requirements
|
||||
|
||||
- Python 3.8+
|
||||
- FFmpeg
|
||||
- Required Python packages (install with `pip install -r requirements.txt`):
|
||||
- moviepy
|
||||
- faster-whisper
|
||||
- numpy
|
||||
|
||||
## Installation
|
||||
|
||||
1. Clone the repository:
|
||||
```bash
|
||||
git clone https://git.ptbox.org/klop51/ShortGenerator.git
|
||||
cd ShortGenerator
|
||||
```
|
||||
|
||||
2. Install dependencies:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
3. Ensure FFmpeg is installed and accessible from your PATH
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic usage:
|
||||
```bash
|
||||
python shorts_generator2.py your_video.mp4
|
||||
```
|
||||
|
||||
This will:
|
||||
1. Analyze your video for loud/exciting moments
|
||||
2. Generate up to 3 short clips (5 seconds each by default)
|
||||
3. Add dynamic subtitles with word-by-word highlighting
|
||||
4. Save the shorts in the `shorts/` folder
|
||||
|
||||
### Customization
|
||||
|
||||
You can modify the script parameters:
|
||||
- `max_clips`: Number of shorts to generate
|
||||
- `chunk_duration`: Length of each short clip
|
||||
- `threshold_db`: Audio loudness threshold for moment detection
|
||||
- `output_folder`: Where to save generated shorts
|
||||
|
||||
## Output
|
||||
|
||||
Generated shorts will be:
|
||||
- 1080x1920 resolution (9:16 aspect ratio)
|
||||
- MP4 format with H.264 codec
|
||||
- Include dynamic subtitles with highlighting effects
|
||||
- Automatically cropped and centered from the original video
|
||||
|
||||
## File Structure
|
||||
|
||||
```
|
||||
ShortGenerator/
|
||||
├── shorts_generator2.py # Main script (latest version)
|
||||
├── shorts_generator.py # Legacy version
|
||||
├── fonts/ # Font files for subtitles
|
||||
├── shorts/ # Generated short videos (gitignored)
|
||||
└── README.md # This file
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
Feel free to submit issues and enhancement requests!
|
||||
|
||||
## License
|
||||
|
||||
This project is open source. Please check the license file for details.
|
||||
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@ -0,0 +1,3 @@
|
||||
moviepy>=1.0.3
|
||||
faster-whisper>=0.10.0
|
||||
numpy>=1.21.0
|
||||
157
shorts_generator.py
Normal file
157
shorts_generator.py
Normal file
@ -0,0 +1,157 @@
|
||||
import os
|
||||
import numpy as np
|
||||
from moviepy import VideoFileClip, TextClip, CompositeVideoClip
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
|
||||
print("🔍 Analyzing audio...")
|
||||
clip = VideoFileClip(video_path)
|
||||
audio = clip.audio.to_soundarray(fps=44100)
|
||||
volume = np.linalg.norm(audio, axis=1)
|
||||
chunk_size = int(chunk_duration * 44100)
|
||||
|
||||
loud_chunks = []
|
||||
max_db = -float('inf')
|
||||
for i in range(0, len(volume), chunk_size):
|
||||
chunk = volume[i:i+chunk_size]
|
||||
db = 20 * np.log10(np.mean(chunk) + 1e-10)
|
||||
max_db = max(max_db, db)
|
||||
if db > threshold_db:
|
||||
start = i / 44100
|
||||
loud_chunks.append((start, min(start + chunk_duration, clip.duration)))
|
||||
|
||||
print(f"🔊 Max volume found: {max_db:.2f} dB, threshold: {threshold_db} dB")
|
||||
print(f"📈 Found {len(loud_chunks)} loud moments")
|
||||
clip.close()
|
||||
return loud_chunks
|
||||
|
||||
def transcribe_and_extract_subtitles(video_path, start, end):
|
||||
print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...")
|
||||
model = WhisperModel("base", device="cpu", compute_type="int8")
|
||||
segments, _ = model.transcribe(video_path, beam_size=5, language="en", vad_filter=True)
|
||||
|
||||
subtitles = []
|
||||
for segment in segments:
|
||||
if start <= segment.start <= end:
|
||||
subtitles.append((segment.start - start, segment.end - start, segment.text))
|
||||
return subtitles
|
||||
|
||||
def create_short_clip(video_path, start, end, subtitles, output_path):
|
||||
print(f"🎬 Creating short: {output_path}")
|
||||
clip = VideoFileClip(video_path).subclipped(start, end)
|
||||
video_duration = clip.duration
|
||||
print(f"📏 Video clip duration: {video_duration:.2f}s")
|
||||
|
||||
# Convert to vertical 9:16
|
||||
vertical_clip = clip.resized(height=1920).cropped(width=1080, x_center=clip.w / 2)
|
||||
|
||||
clips = [vertical_clip]
|
||||
for (s, e, text) in subtitles:
|
||||
try:
|
||||
# Ensure subtitle timing doesn't exceed video duration
|
||||
subtitle_start = max(0, s)
|
||||
subtitle_end = min(e, video_duration)
|
||||
|
||||
if subtitle_start >= video_duration or subtitle_end <= subtitle_start:
|
||||
print(f"⚠️ Skipping subtitle outside video duration: {text[:30]}...")
|
||||
continue
|
||||
|
||||
# Opus Clip style professional subtitles
|
||||
words = text.strip().split()
|
||||
if not words:
|
||||
continue
|
||||
|
||||
# Break text into smaller chunks for better readability (max 3-4 words per line)
|
||||
chunks = []
|
||||
current_chunk = []
|
||||
for word in words:
|
||||
current_chunk.append(word)
|
||||
if len(current_chunk) >= 3 or len(' '.join(current_chunk)) > 25:
|
||||
chunks.append(' '.join(current_chunk))
|
||||
current_chunk = []
|
||||
if current_chunk:
|
||||
chunks.append(' '.join(current_chunk))
|
||||
|
||||
# Position subtitles in the center-bottom area (Opus style)
|
||||
subtitle_position = 0.75
|
||||
|
||||
# Create subtitle for each chunk with smooth transitions
|
||||
chunk_duration = (subtitle_end - subtitle_start) / len(chunks)
|
||||
|
||||
for chunk_idx, chunk_text in enumerate(chunks):
|
||||
chunk_start = subtitle_start + (chunk_idx * chunk_duration)
|
||||
chunk_end = min(chunk_start + chunk_duration, subtitle_end)
|
||||
|
||||
chunk_words = chunk_text.split()
|
||||
|
||||
# Base subtitle with Opus-style design (bold white text with strong outline)
|
||||
base_subtitle = TextClip(
|
||||
text=chunk_text.upper(),
|
||||
font='C:/Windows/Fonts/LatoWeb-Bold.ttf', # Lato Bold - excellent for subtitles
|
||||
font_size=65, # Larger, chunkier text
|
||||
color='white',
|
||||
stroke_color='black',
|
||||
stroke_width=5 # Thicker outline for better readability
|
||||
)
|
||||
base_subtitle = base_subtitle.with_start(chunk_start).with_end(chunk_end).with_position(('center', subtitle_position), relative=True)
|
||||
clips.append(base_subtitle)
|
||||
|
||||
# Opus-style word-by-word highlighting (yellow/orange like Opus)
|
||||
word_duration = chunk_duration / len(chunk_words)
|
||||
|
||||
for i, word in enumerate(chunk_words):
|
||||
word_start = chunk_start + (i * word_duration)
|
||||
word_end = min(word_start + word_duration * 0.8, chunk_end)
|
||||
|
||||
# Opus-style highlighted word (vibrant yellow/orange)
|
||||
highlighted_word = TextClip(
|
||||
text=word.upper(),
|
||||
font='C:/Windows/Fonts/LatoWeb-Bold.ttf', # Lato Bold for consistency
|
||||
font_size=68, # Slightly larger for highlight effect
|
||||
color='#FFD700', # Gold/yellow like Opus Clip
|
||||
stroke_color='#FF6B35', # Orange outline for pop
|
||||
stroke_width=5
|
||||
)
|
||||
|
||||
# Calculate precise word positioning within the chunk
|
||||
words_before = chunk_words[:i]
|
||||
chars_before = sum(len(w) for w in words_before) + len(words_before)
|
||||
|
||||
# More accurate character width calculation for Arial Bold
|
||||
char_width = 35 # Adjusted for larger, bolder font
|
||||
total_chunk_width = len(chunk_text) * char_width
|
||||
word_x_offset = (chars_before * char_width) - (total_chunk_width // 2)
|
||||
|
||||
highlighted_word = highlighted_word.with_start(word_start).with_end(word_end).with_position((540 + word_x_offset, subtitle_position), relative=(False, True))
|
||||
clips.append(highlighted_word)
|
||||
|
||||
print(f"✅ Added Opus-style subtitle ({subtitle_start:.1f}s-{subtitle_end:.1f}s): {text[:30]}...")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Subtitle error: {e}, skipping subtitle: {text[:50]}...")
|
||||
continue
|
||||
|
||||
final = CompositeVideoClip(clips, size=(1080, 1920))
|
||||
final.write_videofile(output_path, codec="libx264", audio_codec="aac", threads=1)
|
||||
|
||||
# 💥 Force close to avoid Windows pipe errors
|
||||
clip.reader.close()
|
||||
if clip.audio:
|
||||
clip.audio.reader.close()
|
||||
final.close()
|
||||
|
||||
def generate_shorts(video_path, max_clips=3, output_folder="shorts"):
|
||||
os.makedirs(output_folder, exist_ok=True)
|
||||
best_moments = detect_loud_moments(video_path, threshold_db=-30)
|
||||
|
||||
selected = best_moments[:max_clips]
|
||||
for i, (start, end) in enumerate(selected):
|
||||
subtitles = transcribe_and_extract_subtitles(video_path, start, end)
|
||||
out_path = os.path.join(output_folder, f"short_{i+1}.mp4")
|
||||
create_short_clip(video_path, start, end, subtitles, out_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python shorts_generator.py your_video.mp4")
|
||||
else:
|
||||
generate_shorts(sys.argv[1])
|
||||
147
shorts_generator2.py
Normal file
147
shorts_generator2.py
Normal file
@ -0,0 +1,147 @@
|
||||
import os
|
||||
import numpy as np
|
||||
from moviepy import VideoFileClip, TextClip, CompositeVideoClip
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
|
||||
print("🔍 Analyzing audio...")
|
||||
clip = VideoFileClip(video_path)
|
||||
audio = clip.audio.to_soundarray(fps=44100)
|
||||
volume = np.linalg.norm(audio, axis=1)
|
||||
chunk_size = int(chunk_duration * 44100)
|
||||
|
||||
loud_chunks = []
|
||||
max_db = -float('inf')
|
||||
for i in range(0, len(volume), chunk_size):
|
||||
chunk = volume[i:i+chunk_size]
|
||||
db = 20 * np.log10(np.mean(chunk) + 1e-10)
|
||||
max_db = max(max_db, db)
|
||||
if db > threshold_db:
|
||||
start = i / 44100
|
||||
loud_chunks.append((start, min(start + chunk_duration, clip.duration)))
|
||||
|
||||
print(f"🔊 Max volume found: {max_db:.2f} dB, threshold: {threshold_db} dB")
|
||||
print(f"📈 Found {len(loud_chunks)} loud moments")
|
||||
clip.close()
|
||||
return loud_chunks
|
||||
|
||||
def transcribe_and_extract_subtitles(video_path, start, end):
|
||||
print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...")
|
||||
model = WhisperModel("base", device="cpu", compute_type="int8")
|
||||
segments, _ = model.transcribe(video_path, beam_size=5, language="en", vad_filter=True)
|
||||
|
||||
subtitles = []
|
||||
for segment in segments:
|
||||
if start <= segment.start <= end:
|
||||
subtitles.append((segment.start - start, segment.end - start, segment.text))
|
||||
return subtitles
|
||||
|
||||
def create_short_clip(video_path, start, end, subtitles, output_path):
|
||||
print(f"🎬 Creating short: {output_path}")
|
||||
clip = VideoFileClip(video_path).subclipped(start, end)
|
||||
video_duration = clip.duration
|
||||
print(f"📏 Video clip duration: {video_duration:.2f}s")
|
||||
|
||||
vertical_clip = clip.resized(height=1920).cropped(width=1080, x_center=clip.w / 2)
|
||||
clips = [vertical_clip]
|
||||
|
||||
subtitle_y_px = 1550 # Fixed Y position for subtitles
|
||||
|
||||
for (s, e, text) in subtitles:
|
||||
try:
|
||||
subtitle_start = max(0, s)
|
||||
subtitle_end = min(e, video_duration)
|
||||
|
||||
if subtitle_start >= video_duration or subtitle_end <= subtitle_start:
|
||||
print(f"⚠️ Skipping subtitle outside video duration: {text[:30]}...")
|
||||
continue
|
||||
|
||||
words = text.strip().split()
|
||||
if not words:
|
||||
continue
|
||||
|
||||
# Split into small readable chunks (max ~3-4 words)
|
||||
chunks = []
|
||||
current_chunk = []
|
||||
for word in words:
|
||||
current_chunk.append(word)
|
||||
if len(current_chunk) >= 2 or len(' '.join(current_chunk)) > 25:
|
||||
chunks.append(' '.join(current_chunk))
|
||||
current_chunk = []
|
||||
if current_chunk:
|
||||
chunks.append(' '.join(current_chunk))
|
||||
|
||||
chunk_duration = (subtitle_end - subtitle_start) / len(chunks)
|
||||
|
||||
for chunk_idx, chunk_text in enumerate(chunks):
|
||||
chunk_start = subtitle_start + (chunk_idx * chunk_duration)
|
||||
chunk_end = min(chunk_start + chunk_duration, subtitle_end)
|
||||
|
||||
chunk_words = chunk_text.split()
|
||||
|
||||
# Base subtitle
|
||||
base_subtitle = TextClip(
|
||||
text=chunk_text.upper(),
|
||||
font='C:/Windows/Fonts/LatoWeb-Bold.ttf',
|
||||
font_size=65,
|
||||
color='white',
|
||||
stroke_color='black',
|
||||
stroke_width=5
|
||||
)
|
||||
text_width, _ = base_subtitle.size
|
||||
base_subtitle = base_subtitle.with_start(chunk_start).with_end(chunk_end).with_position(('center', subtitle_y_px))
|
||||
clips.append(base_subtitle)
|
||||
|
||||
# Highlighted words (perfectly aligned)
|
||||
word_duration = chunk_duration / len(chunk_words)
|
||||
current_x = 540 - (text_width / 2) # 540 is center X of 1080px width
|
||||
|
||||
for i, word in enumerate(chunk_words):
|
||||
word_start = chunk_start + (i * word_duration)
|
||||
word_end = min(word_start + word_duration * 0.8, chunk_end)
|
||||
|
||||
highlighted_word = TextClip(
|
||||
text=word.upper(),
|
||||
font='C:/Windows/Fonts/LatoWeb-Bold.ttf',
|
||||
font_size=68,
|
||||
color='#FFD700',
|
||||
stroke_color='#FF6B35',
|
||||
stroke_width=5
|
||||
)
|
||||
word_width, _ = highlighted_word.size
|
||||
|
||||
word_x = current_x + (word_width / 2)
|
||||
highlighted_word = highlighted_word.with_start(word_start).with_end(word_end).with_position((word_x -8, subtitle_y_px))
|
||||
clips.append(highlighted_word)
|
||||
|
||||
current_x += word_width + 20 # Add spacing between words
|
||||
|
||||
print(f"✅ Added Opus-style subtitle ({subtitle_start:.1f}s-{subtitle_end:.1f}s): {text[:30]}...")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Subtitle error: {e}, skipping subtitle: {text[:50]}...")
|
||||
continue
|
||||
|
||||
final = CompositeVideoClip(clips, size=(1080, 1920))
|
||||
final.write_videofile(output_path, codec="libx264", audio_codec="aac", threads=1)
|
||||
|
||||
clip.reader.close()
|
||||
if clip.audio:
|
||||
clip.audio.reader.close()
|
||||
final.close()
|
||||
|
||||
def generate_shorts(video_path, max_clips=3, output_folder="shorts"):
|
||||
os.makedirs(output_folder, exist_ok=True)
|
||||
best_moments = detect_loud_moments(video_path, threshold_db=-30)
|
||||
selected = best_moments[:max_clips]
|
||||
for i, (start, end) in enumerate(selected):
|
||||
subtitles = transcribe_and_extract_subtitles(video_path, start, end)
|
||||
out_path = os.path.join(output_folder, f"short_{i+1}.mp4")
|
||||
create_short_clip(video_path, start, end, subtitles, out_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python shorts_generator.py your_video.mp4")
|
||||
else:
|
||||
generate_shorts(sys.argv[1])
|
||||
|
||||
Loading…
Reference in New Issue
Block a user