Files
blog-writer/bots/shorts/stock_fetcher.py
JOUNGWOOK KWON 29cdeb2adf fix: Android 코덱 호환성 + 스톡영상 스크린녹화 필터링 + Gitea URL 업데이트
- video_assembler: yuv420p, profile high, level 4.0, movflags faststart 추가
- stock_fetcher: AI/UI 키워드 실사영상으로 변환, 스크린녹화 태그 차단
- CLAUDE.md: Gitea URL https://gitea.gru.farm/ 으로 변경

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-07 21:24:47 +09:00

463 lines
17 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
bots/shorts/stock_fetcher.py
역할: 스크립트 keywords → 스톡 영상 클립 다운로드 (Pexels → Pixabay → 이미지 폴백)
캐릭터 오버레이:
manifest.character_overlay.enabled = true 이면
캐릭터 PNG를 각 클립 우하단에 FFmpeg overlay로 합성.
출력:
data/shorts/clips/{timestamp}/clip_N.mp4
"""
import json
import logging
import os
import subprocess
import tempfile
import urllib.request
from pathlib import Path
from typing import Optional
logger = logging.getLogger(__name__)
BASE_DIR = Path(__file__).parent.parent.parent
PEXELS_VIDEO_URL = 'https://api.pexels.com/videos/search'
PIXABAY_VIDEO_URL = 'https://pixabay.com/api/videos/'
# 스크린 녹화/UI/텍스트 영상 제외 키워드
_SCREEN_BLOCK_TAGS = {
'screen recording', 'screenshot', 'tutorial', 'demo', 'interface',
'typing', 'chatgpt', 'chatbot', 'website', 'browser', 'desktop',
'laptop screen', 'phone screen', 'app', 'software', 'code', 'coding',
'monitor', 'computer screen', 'ui', 'ux', 'dashboard',
}
# 검색어에서 제외할 키워드 (스크린 녹화 유발)
_SEARCH_EXCLUDE = {
'chatgpt', 'ai chat', 'gpt', 'openai', 'claude', 'gemini',
'software', 'app', 'website', 'browser', 'code',
}
def _sanitize_keyword(keyword: str) -> str:
"""스크린 녹화 유발 키워드를 자연 영상 키워드로 변환."""
kw_lower = keyword.lower()
for excl in _SEARCH_EXCLUDE:
if excl in kw_lower:
# AI/기술 키워드 → 실사 대체 키워드
replacements = {
'chatgpt': 'futuristic technology',
'ai chat': 'artificial intelligence robot',
'gpt': 'digital innovation',
'openai': 'technology innovation',
'claude': 'digital brain',
'gemini': 'space stars',
'software': 'digital technology',
'app': 'smartphone lifestyle',
'website': 'modern office',
'browser': 'modern workspace',
'code': 'digital network',
}
for k, v in replacements.items():
if k in kw_lower:
return v
return keyword
def _load_config() -> dict:
cfg_path = BASE_DIR / 'config' / 'shorts_config.json'
if cfg_path.exists():
return json.loads(cfg_path.read_text(encoding='utf-8'))
return {}
def _get_ffmpeg() -> str:
ffmpeg_env = os.environ.get('FFMPEG_PATH', '')
if ffmpeg_env and Path(ffmpeg_env).exists():
return ffmpeg_env
return 'ffmpeg'
# ─── Pexels ──────────────────────────────────────────────────
def _search_pexels(keyword: str, api_key: str, prefer_vertical: bool = True) -> list[dict]:
"""Pexels Video API 검색 → [{url, width, height, duration}, ...] 반환."""
import urllib.parse
import urllib.request
keyword = _sanitize_keyword(keyword)
params = urllib.parse.urlencode({
'query': keyword,
'orientation': 'portrait' if prefer_vertical else 'landscape',
'size': 'medium',
'per_page': 10,
})
req = urllib.request.Request(
f'{PEXELS_VIDEO_URL}?{params}',
headers={
'Authorization': api_key,
'User-Agent': 'Mozilla/5.0 (compatible; BlogWriter/1.0)',
},
)
try:
with urllib.request.urlopen(req, timeout=15) as resp:
data = json.loads(resp.read())
results = []
for v in data.get('videos', []):
# 최적 파일 선택 (HD 이하, portrait 우선)
best = None
for vf in v.get('video_files', []):
if vf.get('quality') in ('hd', 'sd') and vf.get('link', '').endswith('.mp4'):
if best is None or (prefer_vertical and vf.get('height', 0) > vf.get('width', 0)):
best = vf
if best:
results.append({
'url': best['link'],
'width': best.get('width', 0),
'height': best.get('height', 0),
'duration': v.get('duration', 5),
})
return results
except Exception as e:
logger.warning(f'Pexels 검색 실패 ({keyword}): {e}')
return []
# ─── Pixabay ─────────────────────────────────────────────────
def _search_pixabay(keyword: str, api_key: str, prefer_vertical: bool = True) -> list[dict]:
"""Pixabay Video API 검색 → [{url, width, height, duration}, ...] 반환."""
import urllib.parse
keyword = _sanitize_keyword(keyword)
params = urllib.parse.urlencode({
'key': api_key,
'q': keyword,
'video_type': 'film',
'per_page': 10,
})
req = urllib.request.Request(
f'{PIXABAY_VIDEO_URL}?{params}',
headers={'User-Agent': 'Mozilla/5.0 (compatible; BlogWriter/1.0)'},
)
try:
with urllib.request.urlopen(req, timeout=15) as resp:
data = json.loads(resp.read())
results = []
for hit in data.get('hits', []):
# 태그 기반 스크린녹화/UI 영상 필터링
tags = hit.get('tags', '').lower()
if any(block in tags for block in _SCREEN_BLOCK_TAGS):
continue
videos = hit.get('videos', {})
# medium 우선
for quality in ('medium', 'large', 'small', 'tiny'):
vf = videos.get(quality)
if vf and vf.get('url', '').endswith('.mp4'):
results.append({
'url': vf['url'],
'width': vf.get('width', 0),
'height': vf.get('height', 0),
'duration': hit.get('duration', 5),
})
break
return results
except Exception as e:
logger.warning(f'Pixabay 검색 실패 ({keyword}): {e}')
return []
# ─── 다운로드 ─────────────────────────────────────────────────
def _download_clip(url: str, dest: Path) -> bool:
"""URL → dest 파일 다운로드. 성공 시 True."""
try:
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
with urllib.request.urlopen(req, timeout=60) as resp:
data = resp.read()
dest.write_bytes(data)
logger.debug(f'클립 다운로드: {dest.name} ({len(data)//1024}KB)')
return True
except Exception as e:
logger.warning(f'클립 다운로드 실패 ({url[:60]}): {e}')
return False
# ─── FFmpeg 전처리 ────────────────────────────────────────────
def _prepare_clip(input_path: Path, output_path: Path, duration: float = 6.0) -> bool:
"""
클립을 1080×1920 세로 포맷으로 변환 + 길이 트리밍.
가로 클립은 center-crop, 세로 클립은 scale.
"""
ffmpeg = _get_ffmpeg()
cmd = [
ffmpeg, '-y',
'-i', str(input_path),
'-t', str(duration),
'-vf', (
'scale=1080:1920:force_original_aspect_ratio=increase,'
'crop=1080:1920'
),
'-r', '30',
'-c:v', 'libx264', '-crf', '23', '-preset', 'fast',
'-pix_fmt', 'yuv420p',
'-an', # 스톡 클립 오디오 제거
str(output_path),
]
try:
subprocess.run(cmd, check=True, capture_output=True, timeout=120)
return True
except subprocess.CalledProcessError as e:
logger.warning(f'클립 전처리 실패: {e.stderr.decode(errors="ignore")[:200]}')
return False
def _kenburns_image(image_path: Path, output_path: Path, duration: float = 6.0) -> bool:
"""정지 이미지 → Ken Burns 효과 MP4."""
ffmpeg = _get_ffmpeg()
frames = int(duration * 30)
cmd = [
ffmpeg, '-y',
'-loop', '1',
'-i', str(image_path),
'-vf', (
f'scale=1200:2134,'
f'zoompan=z=\'min(zoom+0.0008,1.1)\':'
f'd={frames}:'
f'x=\'iw/2-(iw/zoom/2)\':'
f'y=\'ih/2-(ih/zoom/2)\':'
f's=1080x1920'
),
'-t', str(duration),
'-r', '30',
'-c:v', 'libx264', '-crf', '23', '-preset', 'fast',
'-an',
str(output_path),
]
try:
subprocess.run(cmd, check=True, capture_output=True, timeout=120)
return True
except subprocess.CalledProcessError as e:
logger.warning(f'Ken Burns 실패: {e.stderr.decode(errors="ignore")[:200]}')
return False
# ─── 캐릭터 오버레이 ──────────────────────────────────────────
def _overlay_character(
clip_path: Path,
output_path: Path,
char_png: str,
char_cfg: dict,
) -> bool:
"""
클립 우하단에 캐릭터 PNG 오버레이.
char_cfg: {scale_width, margin_right, margin_bottom}
"""
if not char_png or not Path(char_png).exists():
return False
ffmpeg = _get_ffmpeg()
scale_w = char_cfg.get('scale_width', 300)
mr = char_cfg.get('margin_right', 40)
mb = char_cfg.get('margin_bottom', 250)
# overlay 위치: 오른쪽 끝 - margin
overlay_x = f'W-{scale_w}-{mr}'
overlay_y = f'H-{scale_w * 2}-{mb}' # 대략적인 높이 추정
cmd = [
ffmpeg, '-y',
'-i', str(clip_path),
'-i', char_png,
'-filter_complex', (
f'[1:v]scale={scale_w}:-1[char];'
f'[0:v][char]overlay={overlay_x}:{overlay_y}'
),
'-c:v', 'libx264', '-crf', '23', '-preset', 'fast',
'-an',
str(output_path),
]
try:
subprocess.run(cmd, check=True, capture_output=True, timeout=120)
return True
except subprocess.CalledProcessError as e:
logger.warning(f'캐릭터 오버레이 실패: {e.stderr.decode(errors="ignore")[:200]}')
return False
# ─── 메인 엔트리포인트 ────────────────────────────────────────
def fetch_clips(
script: dict,
manifest: dict,
output_dir: Path,
timestamp: str,
cfg: Optional[dict] = None,
) -> list[Path]:
"""
스크립트 keywords → 클립 목록 (1080×1920, 준비 완료).
Args:
script: {keywords, mood, ...}
manifest: asset_resolver 결과
output_dir: data/shorts/clips/
timestamp: 파일명 prefix
cfg: shorts_config.json dict
Returns:
[clip_path, ...] — 최소 2개, 최대 5개
"""
if cfg is None:
cfg = _load_config()
clips_dir = output_dir / timestamp
clips_dir.mkdir(parents=True, exist_ok=True)
vis_cfg = cfg.get('visuals', {})
min_clips = vis_cfg.get('min_clips', 3)
max_clips = vis_cfg.get('max_clips', 5)
prefer_vertical = vis_cfg.get('prefer_vertical', True)
pexels_key = os.environ.get(vis_cfg.get('pexels_api_key_env', 'PEXELS_API_KEY'), '')
pixabay_key = os.environ.get(vis_cfg.get('pixabay_api_key_env', 'PIXABAY_API_KEY'), '')
char_overlay_cfg = cfg.get('assets', {}).get('character_overlay', {})
overlay_enabled = char_overlay_cfg.get('enabled', True)
# 표정 순서: hook/body/closer → 각 세그먼트에 할당
expressions = manifest.get('expressions', [])
char_pose = manifest.get('pose', manifest.get('character', {}).get('default_pose', ''))
# MotionEngine: 정지 이미지에 7가지 모션 패턴 적용 (직전 2개 제외 자동 선택)
from shorts.motion_engine import MotionEngine
motion = MotionEngine()
result_clips: list[Path] = []
# 1. 사용자 제공 비디오 클립
for i, user_clip in enumerate(manifest.get('user_clips', [])[:max_clips]):
out = clips_dir / f'clip_{i+1:02d}.mp4'
if _prepare_clip(Path(user_clip), out):
result_clips.append(out)
# 2. 사용자 제공 이미지 → MotionEngine (7패턴 자동 선택)
for i, user_img in enumerate(manifest.get('user_images', [])[:max_clips]):
if len(result_clips) >= max_clips:
break
out = clips_dir / f'clip_img_{i+1:02d}.mp4'
result_path = motion.apply(str(user_img), duration=6.0, output_path=str(out))
if result_path:
result_clips.append(Path(result_path))
# 3. 캐릭터 에셋 + 배경 합성
background = manifest.get('background', '')
if background and Path(background).exists() and len(result_clips) < max_clips:
# 배경 이미지 → MotionEngine 클립 (표정별 합성)
for seg_idx, expr_png in enumerate(expressions[:3]):
if len(result_clips) >= max_clips:
break
out_bg = clips_dir / f'clip_bg_{seg_idx+1:02d}.mp4'
if motion.apply(str(background), duration=6.0, output_path=str(out_bg)):
# 표정 오버레이
if expr_png and Path(expr_png).exists():
out_char = clips_dir / f'clip_char_{seg_idx+1:02d}.mp4'
if _overlay_character(out_bg, out_char, expr_png, char_overlay_cfg):
out_bg.unlink(missing_ok=True)
result_clips.append(out_char)
else:
result_clips.append(out_bg)
else:
result_clips.append(out_bg)
# 4. Pexels 스톡 클립
keywords = script.get('keywords', [])
stock_idx = len(result_clips)
for keyword in keywords:
if len(result_clips) >= max_clips:
break
if pexels_key:
videos = _search_pexels(keyword, pexels_key, prefer_vertical)
for v in videos[:2]:
if len(result_clips) >= max_clips:
break
stock_idx += 1
raw = clips_dir / f'raw_{stock_idx:02d}.mp4'
if _download_clip(v['url'], raw):
out = clips_dir / f'clip_stock_{stock_idx:02d}.mp4'
if _prepare_clip(raw, out):
raw.unlink(missing_ok=True)
# 캐릭터 오버레이 (포즈)
if overlay_enabled and char_pose and Path(char_pose).exists():
out_o = clips_dir / f'clip_o_{stock_idx:02d}.mp4'
if _overlay_character(out, out_o, char_pose, char_overlay_cfg):
out.unlink(missing_ok=True)
result_clips.append(out_o)
else:
result_clips.append(out)
else:
result_clips.append(out)
else:
raw.unlink(missing_ok=True)
# 5. Pixabay 폴백
for keyword in keywords:
if len(result_clips) >= max_clips:
break
if pixabay_key:
videos = _search_pixabay(keyword, pixabay_key, prefer_vertical)
for v in videos[:2]:
if len(result_clips) >= max_clips:
break
stock_idx += 1
raw = clips_dir / f'raw_px_{stock_idx:02d}.mp4'
if _download_clip(v['url'], raw):
out = clips_dir / f'clip_px_{stock_idx:02d}.mp4'
if _prepare_clip(raw, out):
raw.unlink(missing_ok=True)
result_clips.append(out)
else:
raw.unlink(missing_ok=True)
# 6. 폴백: 배경 이미지만 있는 단순 클립
if len(result_clips) < min_clips:
logger.warning(f'클립 부족 ({len(result_clips)}/{min_clips}) — 배경 반복 폴백')
fallback_img = Path(background) if background and Path(background).exists() else None
if not fallback_img:
# 단색 배경 생성
fallback_img = clips_dir / 'fallback_bg.png'
_generate_solid_bg(fallback_img)
while len(result_clips) < min_clips:
stock_idx += 1
out = clips_dir / f'clip_fallback_{stock_idx:02d}.mp4'
result_path = motion.apply(str(fallback_img), duration=6.0, output_path=str(out))
if result_path:
result_clips.append(Path(result_path))
else:
break
logger.info(f'클립 준비 완료: {len(result_clips)}개 → {clips_dir}')
return result_clips[:max_clips]
def _generate_solid_bg(output_path: Path, color: str = '#1a1a2e') -> None:
"""단색 배경 PNG 생성 (Pillow 사용, 없으면 FFmpeg)."""
try:
from PIL import Image
img = Image.new('RGB', (1080, 1920), color)
img.save(str(output_path))
except Exception:
ffmpeg = _get_ffmpeg()
try:
subprocess.run(
[ffmpeg, '-y', '-f', 'lavfi',
'-i', f'color=c={color.lstrip("#")}:size=1080x1920:rate=1',
'-frames:v', '1', str(output_path)],
check=True, capture_output=True, timeout=30,
)
except Exception as e:
logger.warning(f'단색 배경 생성 실패: {e}')