Files
blog-writer/bots/shorts/video_assembler.py
sinmb79 66be55ba8a fix(v3): code review 5개 이슈 수정
- korean_preprocessor: 발음 사전 176 → 206개 (200+ 달성)
- video_engine: SoraEngine 완전 제거 (2026-03-24 서비스 종료)
- smart_video_router: veo3/seedance2 빈 문자열 반환 → ffmpeg_slides 폴백
- cli/init: gemini_web 서비스 설정 질문 추가 (user_profile 일치)
- caption_renderer, tts_engine, video_assembler: --test 스탠드얼론 블록 추가

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-29 16:14:51 +09:00

682 lines
24 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
bots/shorts/video_assembler.py
역할: 준비된 클립 + TTS 오디오 + ASS 자막 → 최종 쇼츠 MP4 조립
FFmpeg 전용 (CapCut 없음):
1. 각 클립을 오디오 길이에 맞게 비율 배분
2. xfade crossfade로 연결
3. ASS 자막 burn-in
4. TTS 오디오 합성 + BGM 덕킹
5. 페이드인/페이드아웃
6. 루프 최적화: 마지막 클립 = 첫 클립 복사 (리플레이 유도)
출력:
data/shorts/rendered/{timestamp}.mp4
"""
import json
import logging
import os
import subprocess
import tempfile
import wave
from pathlib import Path
from typing import Optional
logger = logging.getLogger(__name__)
BASE_DIR = Path(__file__).parent.parent.parent
def _load_config() -> dict:
cfg_path = BASE_DIR / 'config' / 'shorts_config.json'
if cfg_path.exists():
return json.loads(cfg_path.read_text(encoding='utf-8'))
return {}
def _get_ffmpeg() -> str:
ffmpeg_env = os.environ.get('FFMPEG_PATH', '')
if ffmpeg_env and Path(ffmpeg_env).exists():
return ffmpeg_env
return 'ffmpeg'
def _get_wav_duration(wav_path: Path) -> float:
try:
with wave.open(str(wav_path), 'rb') as wf:
return wf.getnframes() / wf.getframerate()
except Exception:
# ffprobe 폴백
try:
result = subprocess.run(
['ffprobe', '-v', 'error', '-show_entries', 'format=duration',
'-of', 'default=noprint_wrappers=1:nokey=1', str(wav_path)],
capture_output=True, text=True, timeout=10,
)
return float(result.stdout.strip())
except Exception:
return 20.0
def _get_video_duration(video_path: Path) -> float:
try:
result = subprocess.run(
['ffprobe', '-v', 'error', '-show_entries', 'format=duration',
'-of', 'default=noprint_wrappers=1:nokey=1', str(video_path)],
capture_output=True, text=True, timeout=10,
)
return float(result.stdout.strip())
except Exception:
return 6.0
# ─── 클립 연결 ────────────────────────────────────────────────
def _trim_clip(src: Path, dst: Path, duration: float, ffmpeg: str) -> bool:
"""클립을 duration 초로 트리밍."""
cmd = [
ffmpeg, '-y', '-i', str(src),
'-t', f'{duration:.3f}',
'-c:v', 'libx264', '-crf', '23', '-preset', 'fast',
'-an', '-r', '30',
str(dst),
]
try:
subprocess.run(cmd, check=True, capture_output=True, timeout=120)
return True
except subprocess.CalledProcessError as e:
logger.warning(f'트리밍 실패: {e.stderr.decode(errors="ignore")[:200]}')
return False
def _concat_with_xfade(clips: list[Path], output: Path, crossfade: float, ffmpeg: str) -> bool:
"""
xfade 트랜지션으로 클립 연결.
2개 이상 클립의 경우 순차 xfade 적용.
"""
if len(clips) == 1:
import shutil
shutil.copy2(str(clips[0]), str(output))
return True
# 각 클립 길이 확인
durations = [_get_video_duration(c) for c in clips]
# ffmpeg complex filtergraph 구성
inputs = []
for c in clips:
inputs += ['-i', str(c)]
# xfade chain: [0][1]xfade, [xfade1][2]xfade, ...
filter_parts = []
offset = 0.0
prev_label = '[0:v]'
for i in range(1, len(clips)):
offset += durations[i - 1] - crossfade
out_label = f'[xf{i}]'
filter_parts.append(
f'{prev_label}[{i}:v]xfade=transition=fade:duration={crossfade}:offset={offset:.3f}{out_label}'
)
prev_label = out_label
filter_complex = ';'.join(filter_parts)
cmd = [
ffmpeg, '-y',
*inputs,
'-filter_complex', filter_complex,
'-map', prev_label,
'-c:v', 'libx264', '-crf', '23', '-preset', 'fast',
'-an', '-r', '30',
str(output),
]
try:
subprocess.run(cmd, check=True, capture_output=True, timeout=300)
return True
except subprocess.CalledProcessError as e:
logger.warning(f'xfade 연결 실패: {e.stderr.decode(errors="ignore")[:300]}')
# 폴백: 단순 concat (트랜지션 없음)
return _concat_simple(clips, output, ffmpeg)
def _concat_simple(clips: list[Path], output: Path, ffmpeg: str) -> bool:
"""트랜지션 없는 단순 concat (폴백)."""
list_file = output.parent / 'concat_list.txt'
lines = [f"file '{c.as_posix()}'" for c in clips]
list_file.write_text('\n'.join(lines), encoding='utf-8')
cmd = [
ffmpeg, '-y',
'-f', 'concat', '-safe', '0',
'-i', str(list_file),
'-c:v', 'libx264', '-crf', '23', '-preset', 'fast',
'-an', '-r', '30',
str(output),
]
try:
subprocess.run(cmd, check=True, capture_output=True, timeout=300)
list_file.unlink(missing_ok=True)
return True
except subprocess.CalledProcessError as e:
logger.error(f'단순 concat 실패: {e.stderr.decode(errors="ignore")[:200]}')
list_file.unlink(missing_ok=True)
return False
# ─── 오디오 합성 ─────────────────────────────────────────────
def _mix_audio(tts_wav: Path, bgm_path: Optional[Path], bgm_db: float,
total_dur: float, output: Path, ffmpeg: str) -> bool:
"""TTS + BGM 혼합 (BGM 덕킹)."""
if bgm_path and bgm_path.exists():
cmd = [
ffmpeg, '-y',
'-i', str(tts_wav),
'-stream_loop', '-1', '-i', str(bgm_path),
'-filter_complex', (
f'[1:a]volume={bgm_db}dB,atrim=0:{total_dur:.3f}[bgm];'
f'[0:a][bgm]amix=inputs=2:duration=first[aout]'
),
'-map', '[aout]',
'-c:a', 'aac', '-b:a', '192k',
'-t', f'{total_dur:.3f}',
str(output),
]
else:
cmd = [
ffmpeg, '-y',
'-i', str(tts_wav),
'-c:a', 'aac', '-b:a', '192k',
'-t', f'{total_dur:.3f}',
str(output),
]
try:
subprocess.run(cmd, check=True, capture_output=True, timeout=120)
return True
except subprocess.CalledProcessError as e:
logger.warning(f'오디오 혼합 실패: {e.stderr.decode(errors="ignore")[:200]}')
return False
# ─── 최종 합성 ────────────────────────────────────────────────
def _assemble_final(
video: Path, audio: Path, ass_path: Optional[Path],
output: Path, fade_in: float, fade_out: float,
total_dur: float, cfg: dict, ffmpeg: str,
) -> bool:
"""
비디오 + 오디오 + ASS 자막 → 최종 MP4.
페이드인/아웃 + 루프 최적화 (0.2s 무음 끝에 추가).
"""
vid_cfg = cfg.get('video', {})
crf = vid_cfg.get('crf', 18)
codec = vid_cfg.get('codec', 'libx264')
audio_codec = vid_cfg.get('audio_codec', 'aac')
audio_bitrate = vid_cfg.get('audio_bitrate', '192k')
# 페이드인/아웃 필터
fade_filter = (
f'fade=t=in:st=0:d={fade_in},'
f'fade=t=out:st={total_dur - fade_out:.3f}:d={fade_out}'
)
# ASS 자막 burn-in
if ass_path and ass_path.exists():
ass_posix = ass_path.as_posix().replace(':', '\\:')
vf = f'{fade_filter},ass={ass_posix}'
else:
vf = fade_filter
cmd = [
ffmpeg, '-y',
'-i', str(video),
'-i', str(audio),
'-vf', vf,
'-af', (
f'afade=t=in:st=0:d={fade_in},'
f'afade=t=out:st={total_dur - fade_out:.3f}:d={fade_out},'
f'apad=pad_dur=0.2' # 루프 최적화: 0.2s 무음
),
'-c:v', codec, '-crf', str(crf), '-preset', 'medium',
'-c:a', audio_codec, '-b:a', audio_bitrate,
'-r', str(vid_cfg.get('fps', 30)),
'-shortest',
str(output),
]
try:
subprocess.run(cmd, check=True, capture_output=True, timeout=600)
return True
except subprocess.CalledProcessError as e:
logger.error(f'최종 합성 실패: {e.stderr.decode(errors="ignore")[:400]}')
return False
# ─── 파일 크기 체크 ──────────────────────────────────────────
def _check_filesize(path: Path, max_mb: int = 50) -> bool:
size_mb = path.stat().st_size / (1024 * 1024)
logger.info(f'출력 파일 크기: {size_mb:.1f}MB')
return size_mb <= max_mb
def _rerender_smaller(src: Path, dst: Path, ffmpeg: str) -> bool:
"""파일 크기 초과 시 CRF 23으로 재인코딩."""
cmd = [
ffmpeg, '-y', '-i', str(src),
'-c:v', 'libx264', '-crf', '23', '-preset', 'medium',
'-c:a', 'aac', '-b:a', '128k',
str(dst),
]
try:
subprocess.run(cmd, check=True, capture_output=True, timeout=600)
return True
except subprocess.CalledProcessError as e:
logger.error(f'재인코딩 실패: {e.stderr.decode(errors="ignore")[:200]}')
return False
# ─── 메인 엔트리포인트 ────────────────────────────────────────
def assemble(
clips: list[Path],
tts_wav: Path,
ass_path: Optional[Path],
output_dir: Path,
timestamp: str,
cfg: Optional[dict] = None,
work_dir: Optional[Path] = None,
) -> Path:
"""
클립 + TTS + 자막 → 최종 쇼츠 MP4.
Args:
clips: [clip_path, ...] — 준비된 1080×1920 MP4 목록
tts_wav: TTS 오디오 WAV 경로
ass_path: ASS 자막 경로 (None이면 자막 없음)
output_dir: data/shorts/rendered/
timestamp: 파일명 prefix
cfg: shorts_config.json dict
work_dir: 임시 작업 디렉터리 (None이면 자동 생성)
Returns:
rendered_path
Raises:
RuntimeError — 조립 실패 또는 품질 게이트 미통과
"""
if cfg is None:
cfg = _load_config()
output_dir.mkdir(parents=True, exist_ok=True)
ffmpeg = _get_ffmpeg()
vid_cfg = cfg.get('video', {})
crossfade = vid_cfg.get('crossfade_sec', 0.3)
fade_in = vid_cfg.get('fade_in_sec', 0.5)
fade_out = vid_cfg.get('fade_out_sec', 0.5)
bgm_path_str = vid_cfg.get('bgm_path', '')
bgm_db = vid_cfg.get('bgm_volume_db', -18)
bgm_path = BASE_DIR / bgm_path_str if bgm_path_str else None
audio_dur = _get_wav_duration(tts_wav)
logger.info(f'TTS 길이: {audio_dur:.1f}')
# 품질 게이트: 15초 미만 / 60초 초과
if audio_dur < 10:
raise RuntimeError(f'TTS 길이 너무 짧음: {audio_dur:.1f}초 (최소 10초)')
if audio_dur > 65:
raise RuntimeError(f'TTS 길이 너무 김: {audio_dur:.1f}초 (최대 65초)')
if not clips:
raise RuntimeError('클립 없음 — 조립 불가')
# 임시 작업 디렉터리
import contextlib
import shutil
tmp_cleanup = work_dir is None
if work_dir is None:
work_dir = output_dir / f'_work_{timestamp}'
work_dir.mkdir(parents=True, exist_ok=True)
try:
# ── 루프 최적화: 클립 목록 끝에 첫 클립 추가 ──────────────
loop_clips = list(clips)
if len(clips) > 1:
loop_clip = work_dir / 'loop_clip.mp4'
if _trim_clip(clips[0], loop_clip, min(2.0, _get_video_duration(clips[0])), ffmpeg):
loop_clips.append(loop_clip)
# ── 클립 길이 배분 ────────────────────────────────────────
total_clip_dur = audio_dur + fade_in + fade_out
n = len(loop_clips)
base_dur = total_clip_dur / n
clip_dur = max(3.0, min(base_dur, 8.0))
# 각 클립 트리밍
trimmed = []
for i, clip in enumerate(loop_clips):
t = work_dir / f'trimmed_{i:02d}.mp4'
src_dur = _get_video_duration(clip)
actual_dur = min(clip_dur, src_dur)
if actual_dur < 1.0:
actual_dur = src_dur
if _trim_clip(clip, t, actual_dur, ffmpeg):
trimmed.append(t)
else:
logger.warning(f'클립 {i} 트리밍 실패 — 건너뜀')
if not trimmed:
raise RuntimeError('트리밍된 클립 없음')
# ── 클립 연결 ─────────────────────────────────────────────
concat_out = work_dir / 'concat.mp4'
if not _concat_with_xfade(trimmed, concat_out, crossfade, ffmpeg):
raise RuntimeError('클립 연결 실패')
# ── 오디오 혼합 ───────────────────────────────────────────
audio_out = work_dir / 'audio_mixed.aac'
if not _mix_audio(tts_wav, bgm_path, bgm_db, audio_dur + 0.2, audio_out, ffmpeg):
# BGM 없이 TTS만
audio_out = tts_wav
# ── 최종 합성 ─────────────────────────────────────────────
final_out = output_dir / f'{timestamp}.mp4'
if not _assemble_final(
concat_out, audio_out, ass_path,
final_out, fade_in, fade_out, audio_dur,
cfg, ffmpeg,
):
raise RuntimeError('최종 합성 실패')
# ── 파일 크기 게이트 ──────────────────────────────────────
if not _check_filesize(final_out, max_mb=50):
logger.warning('파일 크기 초과 (>50MB) — CRF 23으로 재인코딩')
rerender_out = output_dir / f'{timestamp}_small.mp4'
if _rerender_smaller(final_out, rerender_out, ffmpeg):
final_out.unlink()
rerender_out.rename(final_out)
# ── 최종 길이 검증 ─────────────────────────────────────────
final_dur = _get_video_duration(final_out)
if final_dur < 10:
raise RuntimeError(f'최종 영상 길이 너무 짧음: {final_dur:.1f}')
if final_dur > 65:
logger.warning(f'최종 영상 길이 초과: {final_dur:.1f}초 (YouTube Shorts 제한 60초)')
logger.info(f'쇼츠 조립 완료: {final_out.name} ({final_dur:.1f}초)')
return final_out
finally:
if tmp_cleanup and work_dir.exists():
import shutil
shutil.rmtree(work_dir, ignore_errors=True)
# ─── GPU Encoder Detection ────────────────────────────────────
def _detect_gpu_encoder(ffmpeg: str = 'ffmpeg') -> str:
"""
Detect available GPU encoder in priority order:
nvenc (NVIDIA) > amf (AMD) > qsv (Intel) > libx264 (CPU)
Returns: encoder name string
"""
encoders_to_try = [
('h264_nvenc', ['-hwaccel', 'cuda']), # NVIDIA
('h264_amf', []), # AMD
('h264_qsv', ['-hwaccel', 'qsv']), # Intel
]
import tempfile, subprocess
for encoder, hwaccel_args in encoders_to_try:
try:
with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as f:
test_out = f.name
cmd = (
[ffmpeg, '-y', '-loglevel', 'error']
+ hwaccel_args
+ ['-f', 'lavfi', '-i', 'color=black:s=16x16:r=1',
'-t', '0.1',
'-c:v', encoder,
test_out]
)
result = subprocess.run(cmd, capture_output=True, timeout=10)
Path(test_out).unlink(missing_ok=True)
if result.returncode == 0:
logger.info(f'[GPU] 인코더 감지: {encoder}')
return encoder
except Exception:
pass
logger.info('[GPU] GPU 인코더 없음 — libx264 사용')
return 'libx264'
# ─── Resilient Assembler ─────────────────────────────────────
class ResilientAssembler:
"""
Resilient video assembler with:
1. Per-clip encoding (fail one → fallback that clip only)
2. Timeout per FFmpeg process (5 minutes)
3. GPU encoder auto-detection (nvenc/amf/qsv/cpu)
4. Progress reporting (logs every clip)
Use assemble_resilient() instead of the module-level assemble() for better fault tolerance.
"""
CLIP_TIMEOUT = 300 # 5 minutes per clip
FINAL_TIMEOUT = 600 # 10 minutes for final assembly
def __init__(self, cfg: dict = None):
"""
cfg: shorts_config.json dict (loaded automatically if None)
"""
self._cfg = cfg or _load_config()
self._ffmpeg = _get_ffmpeg()
self._encoder = None # Lazy detection
def _get_encoder(self) -> str:
"""Detect and cache GPU encoder."""
if self._encoder is None:
self._encoder = _detect_gpu_encoder(self._ffmpeg)
return self._encoder
def _encode_clip(self, clip_path: Path, index: int, work_dir: Path) -> Path:
"""
Encode a single clip to standardized format.
Returns: path to encoded clip
Raises: RuntimeError on failure (triggers fallback)
"""
out = work_dir / f'encoded_{index:02d}.mp4'
encoder = self._get_encoder()
cmd = [
self._ffmpeg, '-y',
'-i', str(clip_path),
'-c:v', encoder,
'-crf', '20' if encoder == 'libx264' else '20',
'-preset', 'fast' if encoder == 'libx264' else 'fast',
'-pix_fmt', 'yuv420p',
'-an', '-r', '30',
str(out),
]
# Adjust args for GPU encoders (they use different quality flags)
if encoder != 'libx264':
cmd = [
self._ffmpeg, '-y',
'-i', str(clip_path),
'-c:v', encoder,
'-b:v', '2M', # Bitrate for GPU encoders
'-pix_fmt', 'yuv420p',
'-an', '-r', '30',
str(out),
]
try:
result = subprocess.run(
cmd, capture_output=True, timeout=self.CLIP_TIMEOUT
)
if result.returncode != 0:
raise RuntimeError(f'FFmpeg error: {result.stderr.decode(errors="ignore")[-200:]}')
logger.info(f'[조립] 클립 {index} 인코딩 완료 ({encoder})')
return out
except subprocess.TimeoutExpired:
raise RuntimeError(f'클립 {index} 인코딩 타임아웃 ({self.CLIP_TIMEOUT}초)')
def _fallback_clip(self, clip_path: Path, index: int, work_dir: Path) -> Path:
"""
Fallback clip encoding using libx264 (CPU, always works).
"""
logger.warning(f'[조립] 클립 {index} 폴백 인코딩 (libx264)')
out = work_dir / f'fallback_{index:02d}.mp4'
cmd = [
self._ffmpeg, '-y',
'-i', str(clip_path),
'-c:v', 'libx264', '-crf', '23', '-preset', 'fast',
'-pix_fmt', 'yuv420p',
'-an', '-r', '30',
str(out),
]
try:
result = subprocess.run(cmd, capture_output=True, timeout=self.CLIP_TIMEOUT)
if result.returncode != 0:
logger.error(f'[조립] 폴백도 실패 (클립 {index}): {result.stderr.decode(errors="ignore")[-100:]}')
return clip_path # Return original as last resort
return out
except subprocess.TimeoutExpired:
logger.error(f'[조립] 폴백 타임아웃 (클립 {index})')
return clip_path
def assemble_resilient(
self,
clips: list[Path],
tts_wav: Path,
ass_path: Optional[Path],
output_dir: Path,
timestamp: str,
work_dir: Optional[Path] = None,
) -> Path:
"""
Resilient version of assemble() with per-clip fallback.
Key differences from assemble():
1. Each clip is encoded individually — failure → fallback that clip only
2. GPU encoder used when available
3. Per-process timeout (5 min per clip)
4. Progress logged per clip
Args:
Same as assemble()
Returns: Path to rendered MP4
Raises: RuntimeError only if ALL clips fail or final assembly fails
"""
import contextlib, shutil
output_dir.mkdir(parents=True, exist_ok=True)
tmp_cleanup = work_dir is None
if work_dir is None:
work_dir = output_dir / f'_resilient_{timestamp}'
work_dir.mkdir(parents=True, exist_ok=True)
try:
# Step 1: Encode each clip (with per-clip fallback)
encoded = []
failed_count = 0
for i, clip in enumerate(clips):
logger.info(f'[조립] 클립 {i+1}/{len(clips)} 처리 중...')
try:
enc = self._encode_clip(clip, i, work_dir)
encoded.append(enc)
except Exception as e:
logger.warning(f'[조립] 클립 {i} 인코딩 실패: {e} — 폴백 사용')
failed_count += 1
fb = self._fallback_clip(clip, i, work_dir)
encoded.append(fb)
if not encoded:
raise RuntimeError('[조립] 인코딩된 클립 없음 — 조립 불가')
if failed_count > 0:
logger.warning(f'[조립] {failed_count}/{len(clips)} 클립이 폴백으로 인코딩됨')
# Step 2: Use the existing assemble() for the rest (concat + audio + subtitles)
# This reuses all the battle-tested logic from the original assembler
result_path = assemble(
clips=encoded,
tts_wav=tts_wav,
ass_path=ass_path,
output_dir=output_dir,
timestamp=timestamp,
cfg=self._cfg,
work_dir=work_dir / 'assemble',
)
logger.info(f'[조립] 탄력적 조립 완료: {result_path.name}')
return result_path
finally:
if tmp_cleanup and work_dir.exists():
shutil.rmtree(work_dir, ignore_errors=True)
# ── Standalone test ──────────────────────────────────────────────
if __name__ == '__main__':
import sys
if '--test' not in sys.argv:
print("사용법: python -m bots.shorts.video_assembler --test")
sys.exit(0)
print("=== Video Assembler Test ===")
# Test GPU encoder detection
print("\n[1] GPU 인코더 자동 감지:")
ffmpeg_bin = _get_ffmpeg()
encoder = _detect_gpu_encoder(ffmpeg_bin)
print(f" 감지된 인코더: {encoder}")
assert encoder in ('h264_nvenc', 'h264_amf', 'h264_qsv', 'libx264'), \
f"알 수 없는 인코더: {encoder}"
# Test ResilientAssembler encoder caching
print("\n[2] ResilientAssembler 초기화 + 인코더 캐싱:")
assembler = ResilientAssembler()
enc1 = assembler._get_encoder()
enc2 = assembler._get_encoder()
print(f" 인코더: {enc1}")
assert enc1 == enc2, "캐시 불일치"
assert assembler._encoder is not None, "캐시 저장 실패"
# Test duration helpers
print("\n[3] 유틸 함수:")
# WAV duration (requires existing file — skip if not present)
try:
import tempfile, wave
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
tmp_path = Path(tmp.name)
# Write minimal valid WAV (1s silence at 44100Hz mono)
with wave.open(str(tmp_path), 'w') as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(44100)
wf.writeframes(b'\x00\x00' * 44100)
dur = _get_wav_duration(tmp_path)
print(f" WAV 1초 테스트: duration={dur:.2f}s")
assert abs(dur - 1.0) < 0.1, f"WAV 길이 오류: {dur}"
tmp_path.unlink(missing_ok=True)
except Exception as e:
print(f" [경고] WAV 테스트 건너뜀: {e}")
print("\n✅ 모든 테스트 통과")