- Add shorts_generator.py and shorts_generator2.py (main scripts) - Add README.md with project documentation - Add requirements.txt with Python dependencies - Add .gitignore to exclude video files and other unnecessary files
148 lines
5.9 KiB
Python
148 lines
5.9 KiB
Python
import os
|
|
import numpy as np
|
|
from moviepy import VideoFileClip, TextClip, CompositeVideoClip
|
|
from faster_whisper import WhisperModel
|
|
|
|
def detect_loud_moments(video_path, chunk_duration=5, threshold_db=10):
|
|
print("🔍 Analyzing audio...")
|
|
clip = VideoFileClip(video_path)
|
|
audio = clip.audio.to_soundarray(fps=44100)
|
|
volume = np.linalg.norm(audio, axis=1)
|
|
chunk_size = int(chunk_duration * 44100)
|
|
|
|
loud_chunks = []
|
|
max_db = -float('inf')
|
|
for i in range(0, len(volume), chunk_size):
|
|
chunk = volume[i:i+chunk_size]
|
|
db = 20 * np.log10(np.mean(chunk) + 1e-10)
|
|
max_db = max(max_db, db)
|
|
if db > threshold_db:
|
|
start = i / 44100
|
|
loud_chunks.append((start, min(start + chunk_duration, clip.duration)))
|
|
|
|
print(f"🔊 Max volume found: {max_db:.2f} dB, threshold: {threshold_db} dB")
|
|
print(f"📈 Found {len(loud_chunks)} loud moments")
|
|
clip.close()
|
|
return loud_chunks
|
|
|
|
def transcribe_and_extract_subtitles(video_path, start, end):
|
|
print(f"🗣️ Transcribing audio from {start:.2f}s to {end:.2f}s...")
|
|
model = WhisperModel("base", device="cpu", compute_type="int8")
|
|
segments, _ = model.transcribe(video_path, beam_size=5, language="en", vad_filter=True)
|
|
|
|
subtitles = []
|
|
for segment in segments:
|
|
if start <= segment.start <= end:
|
|
subtitles.append((segment.start - start, segment.end - start, segment.text))
|
|
return subtitles
|
|
|
|
def create_short_clip(video_path, start, end, subtitles, output_path):
|
|
print(f"🎬 Creating short: {output_path}")
|
|
clip = VideoFileClip(video_path).subclipped(start, end)
|
|
video_duration = clip.duration
|
|
print(f"📏 Video clip duration: {video_duration:.2f}s")
|
|
|
|
vertical_clip = clip.resized(height=1920).cropped(width=1080, x_center=clip.w / 2)
|
|
clips = [vertical_clip]
|
|
|
|
subtitle_y_px = 1550 # Fixed Y position for subtitles
|
|
|
|
for (s, e, text) in subtitles:
|
|
try:
|
|
subtitle_start = max(0, s)
|
|
subtitle_end = min(e, video_duration)
|
|
|
|
if subtitle_start >= video_duration or subtitle_end <= subtitle_start:
|
|
print(f"⚠️ Skipping subtitle outside video duration: {text[:30]}...")
|
|
continue
|
|
|
|
words = text.strip().split()
|
|
if not words:
|
|
continue
|
|
|
|
# Split into small readable chunks (max ~3-4 words)
|
|
chunks = []
|
|
current_chunk = []
|
|
for word in words:
|
|
current_chunk.append(word)
|
|
if len(current_chunk) >= 2 or len(' '.join(current_chunk)) > 25:
|
|
chunks.append(' '.join(current_chunk))
|
|
current_chunk = []
|
|
if current_chunk:
|
|
chunks.append(' '.join(current_chunk))
|
|
|
|
chunk_duration = (subtitle_end - subtitle_start) / len(chunks)
|
|
|
|
for chunk_idx, chunk_text in enumerate(chunks):
|
|
chunk_start = subtitle_start + (chunk_idx * chunk_duration)
|
|
chunk_end = min(chunk_start + chunk_duration, subtitle_end)
|
|
|
|
chunk_words = chunk_text.split()
|
|
|
|
# Base subtitle
|
|
base_subtitle = TextClip(
|
|
text=chunk_text.upper(),
|
|
font='C:/Windows/Fonts/LatoWeb-Bold.ttf',
|
|
font_size=65,
|
|
color='white',
|
|
stroke_color='black',
|
|
stroke_width=5
|
|
)
|
|
text_width, _ = base_subtitle.size
|
|
base_subtitle = base_subtitle.with_start(chunk_start).with_end(chunk_end).with_position(('center', subtitle_y_px))
|
|
clips.append(base_subtitle)
|
|
|
|
# Highlighted words (perfectly aligned)
|
|
word_duration = chunk_duration / len(chunk_words)
|
|
current_x = 540 - (text_width / 2) # 540 is center X of 1080px width
|
|
|
|
for i, word in enumerate(chunk_words):
|
|
word_start = chunk_start + (i * word_duration)
|
|
word_end = min(word_start + word_duration * 0.8, chunk_end)
|
|
|
|
highlighted_word = TextClip(
|
|
text=word.upper(),
|
|
font='C:/Windows/Fonts/LatoWeb-Bold.ttf',
|
|
font_size=68,
|
|
color='#FFD700',
|
|
stroke_color='#FF6B35',
|
|
stroke_width=5
|
|
)
|
|
word_width, _ = highlighted_word.size
|
|
|
|
word_x = current_x + (word_width / 2)
|
|
highlighted_word = highlighted_word.with_start(word_start).with_end(word_end).with_position((word_x -8, subtitle_y_px))
|
|
clips.append(highlighted_word)
|
|
|
|
current_x += word_width + 20 # Add spacing between words
|
|
|
|
print(f"✅ Added Opus-style subtitle ({subtitle_start:.1f}s-{subtitle_end:.1f}s): {text[:30]}...")
|
|
except Exception as e:
|
|
print(f"⚠️ Subtitle error: {e}, skipping subtitle: {text[:50]}...")
|
|
continue
|
|
|
|
final = CompositeVideoClip(clips, size=(1080, 1920))
|
|
final.write_videofile(output_path, codec="libx264", audio_codec="aac", threads=1)
|
|
|
|
clip.reader.close()
|
|
if clip.audio:
|
|
clip.audio.reader.close()
|
|
final.close()
|
|
|
|
def generate_shorts(video_path, max_clips=3, output_folder="shorts"):
|
|
os.makedirs(output_folder, exist_ok=True)
|
|
best_moments = detect_loud_moments(video_path, threshold_db=-30)
|
|
selected = best_moments[:max_clips]
|
|
for i, (start, end) in enumerate(selected):
|
|
subtitles = transcribe_and_extract_subtitles(video_path, start, end)
|
|
out_path = os.path.join(output_folder, f"short_{i+1}.mp4")
|
|
create_short_clip(video_path, start, end, subtitles, out_path)
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python shorts_generator.py your_video.mp4")
|
|
else:
|
|
generate_shorts(sys.argv[1])
|
|
|