feat(v3): PR 6 — HookOptimizer + MicroSignals (3 signals)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
17
bots/quality/__init__.py
Normal file
17
bots/quality/__init__.py
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
"""
|
||||||
|
bots/quality
|
||||||
|
Quality signal computation for shorts content.
|
||||||
|
|
||||||
|
V3.0 signals:
|
||||||
|
- motion_variation_score
|
||||||
|
- script_diversity_score
|
||||||
|
- tts_cost_efficiency
|
||||||
|
|
||||||
|
V3.1+ additions:
|
||||||
|
- semantic_visual_score
|
||||||
|
- caption_overlap_score
|
||||||
|
- pacing_variation_score
|
||||||
|
"""
|
||||||
|
from .micro_signals import compute_signal, SIGNALS_V1
|
||||||
|
|
||||||
|
__all__ = ['compute_signal', 'SIGNALS_V1']
|
||||||
215
bots/quality/micro_signals.py
Normal file
215
bots/quality/micro_signals.py
Normal file
@@ -0,0 +1,215 @@
|
|||||||
|
"""
|
||||||
|
bots/quality/micro_signals.py
|
||||||
|
Micro-failure quality signals for shorts content.
|
||||||
|
|
||||||
|
V3.0 scope: 3 signals
|
||||||
|
- motion_variation_score: detects repetitive motion patterns
|
||||||
|
- script_diversity_score: detects structural overlap with recent scripts
|
||||||
|
- tts_cost_efficiency: monitors TTS credit usage
|
||||||
|
|
||||||
|
Each signal returns a float 0.0-1.0 where:
|
||||||
|
- 1.0 = perfect / no issue
|
||||||
|
- 0.0 = critical problem
|
||||||
|
- threshold = action trigger point
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Callable, Any
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
SIGNALS_V1 = {
|
||||||
|
'motion_variation_score': {
|
||||||
|
'description': 'Consecutive clips using same motion pattern',
|
||||||
|
'threshold': 0.6,
|
||||||
|
'action': 'auto_fix', # pick different pattern automatically
|
||||||
|
'higher_is_better': True,
|
||||||
|
},
|
||||||
|
'script_diversity_score': {
|
||||||
|
'description': 'Script structure overlap with last 7 days',
|
||||||
|
'threshold': 0.5,
|
||||||
|
'action': 'regenerate', # request different structure from LLM
|
||||||
|
'higher_is_better': True,
|
||||||
|
},
|
||||||
|
'tts_cost_efficiency': {
|
||||||
|
'description': 'TTS credit usage vs monthly limit',
|
||||||
|
'threshold': 0.8,
|
||||||
|
'action': 'switch_engine', # downgrade to local TTS
|
||||||
|
'higher_is_better': False, # lower usage = better
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def compute_signal(signal_name: str, **kwargs) -> float:
|
||||||
|
"""
|
||||||
|
Compute a quality signal value.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
signal_name: One of SIGNALS_V1 keys
|
||||||
|
**kwargs: Signal-specific inputs (see individual compute functions)
|
||||||
|
|
||||||
|
Returns: float 0.0-1.0
|
||||||
|
|
||||||
|
Raises: ValueError if signal_name unknown
|
||||||
|
"""
|
||||||
|
if signal_name not in SIGNALS_V1:
|
||||||
|
raise ValueError(f'Unknown signal: {signal_name}. Available: {list(SIGNALS_V1.keys())}')
|
||||||
|
|
||||||
|
compute_fns = {
|
||||||
|
'motion_variation_score': _compute_motion_variation,
|
||||||
|
'script_diversity_score': _compute_script_diversity,
|
||||||
|
'tts_cost_efficiency': _compute_tts_cost_efficiency,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn = compute_fns[signal_name]
|
||||||
|
try:
|
||||||
|
value = fn(**kwargs)
|
||||||
|
logger.debug(f'[품질] {signal_name} = {value:.3f}')
|
||||||
|
return value
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f'[품질] 신호 계산 실패 ({signal_name}): {e}')
|
||||||
|
return 1.0 # Neutral value on error (don't trigger action)
|
||||||
|
|
||||||
|
|
||||||
|
def check_and_act(signal_name: str, value: float) -> dict:
|
||||||
|
"""
|
||||||
|
Check if signal value crosses threshold and return action.
|
||||||
|
|
||||||
|
Returns: {
|
||||||
|
'triggered': bool,
|
||||||
|
'action': str or None,
|
||||||
|
'value': float,
|
||||||
|
'threshold': float,
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
if signal_name not in SIGNALS_V1:
|
||||||
|
return {'triggered': False, 'action': None, 'value': value, 'threshold': 0}
|
||||||
|
|
||||||
|
config = SIGNALS_V1[signal_name]
|
||||||
|
threshold = config['threshold']
|
||||||
|
higher_is_better = config.get('higher_is_better', True)
|
||||||
|
|
||||||
|
if higher_is_better:
|
||||||
|
triggered = value < threshold
|
||||||
|
else:
|
||||||
|
triggered = value > threshold
|
||||||
|
|
||||||
|
return {
|
||||||
|
'triggered': triggered,
|
||||||
|
'action': config['action'] if triggered else None,
|
||||||
|
'value': value,
|
||||||
|
'threshold': threshold,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_motion_variation(clips: list, **kwargs) -> float:
|
||||||
|
"""
|
||||||
|
Compute motion variation score.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
clips: list of dicts with 'pattern' key, e.g. [{'pattern': 'ken_burns_in'}, ...]
|
||||||
|
|
||||||
|
Returns: 0.0-1.0 diversity score
|
||||||
|
"""
|
||||||
|
if not clips or len(clips) < 2:
|
||||||
|
return 1.0
|
||||||
|
|
||||||
|
patterns = [c.get('pattern', '') for c in clips if c.get('pattern')]
|
||||||
|
if not patterns:
|
||||||
|
return 1.0
|
||||||
|
|
||||||
|
# Count consecutive same-pattern pairs
|
||||||
|
consecutive_same = sum(
|
||||||
|
1 for i in range(len(patterns) - 1)
|
||||||
|
if patterns[i] == patterns[i+1]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Unique patterns ratio
|
||||||
|
unique_ratio = len(set(patterns)) / len(patterns)
|
||||||
|
consecutive_penalty = consecutive_same / max(len(patterns) - 1, 1)
|
||||||
|
|
||||||
|
score = unique_ratio * (1 - consecutive_penalty)
|
||||||
|
return round(min(1.0, max(0.0, score)), 3)
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_script_diversity(script: dict, history: list = None, **kwargs) -> float:
|
||||||
|
"""
|
||||||
|
Compute script structure diversity vs recent history.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
script: Current script dict with 'hook', 'body', 'closer'
|
||||||
|
history: List of recent scripts (last 7 days), each same format
|
||||||
|
|
||||||
|
Returns: 0.0-1.0 diversity score (1.0 = very diverse)
|
||||||
|
"""
|
||||||
|
if not history:
|
||||||
|
return 1.0
|
||||||
|
|
||||||
|
# Compare script structure fingerprints
|
||||||
|
def _fingerprint(s: dict) -> tuple:
|
||||||
|
hook = s.get('hook', '')
|
||||||
|
body = s.get('body', [])
|
||||||
|
closer = s.get('closer', '')
|
||||||
|
return (
|
||||||
|
len(hook) // 10, # rough length bucket
|
||||||
|
len(body), # number of body sentences
|
||||||
|
hook[:5] if hook else '', # hook start
|
||||||
|
)
|
||||||
|
|
||||||
|
current_fp = _fingerprint(script)
|
||||||
|
|
||||||
|
overlaps = sum(
|
||||||
|
1 for h in history
|
||||||
|
if _fingerprint(h) == current_fp
|
||||||
|
)
|
||||||
|
|
||||||
|
overlap_rate = overlaps / len(history)
|
||||||
|
return round(1.0 - overlap_rate, 3)
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_tts_cost_efficiency(usage: float, limit: float, **kwargs) -> float:
|
||||||
|
"""
|
||||||
|
Compute TTS cost efficiency.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
usage: Characters used this period
|
||||||
|
limit: Monthly/daily character limit
|
||||||
|
|
||||||
|
Returns: ratio (usage/limit), where > threshold triggers engine switch
|
||||||
|
"""
|
||||||
|
if limit <= 0:
|
||||||
|
return 0.0
|
||||||
|
return round(min(1.0, usage / limit), 3)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Standalone test ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
if '--test' in sys.argv:
|
||||||
|
print("=== Micro Signals Test ===")
|
||||||
|
|
||||||
|
# Test motion variation
|
||||||
|
test_clips = [
|
||||||
|
{'pattern': 'ken_burns_in'},
|
||||||
|
{'pattern': 'ken_burns_in'}, # repeat!
|
||||||
|
{'pattern': 'pan_left'},
|
||||||
|
{'pattern': 'pan_right'},
|
||||||
|
]
|
||||||
|
mv = compute_signal('motion_variation_score', clips=test_clips)
|
||||||
|
result = check_and_act('motion_variation_score', mv)
|
||||||
|
print(f"motion_variation_score = {mv:.3f} (triggered: {result['triggered']}, action: {result['action']})")
|
||||||
|
|
||||||
|
# Test script diversity
|
||||||
|
current_script = {'hook': '이거 모르면 손해', 'body': ['첫째', '둘째', '셋째'], 'closer': '구독'}
|
||||||
|
history = [
|
||||||
|
{'hook': '이거 모르면 손해2', 'body': ['a', 'b', 'c'], 'closer': '팔로우'},
|
||||||
|
]
|
||||||
|
sd = compute_signal('script_diversity_score', script=current_script, history=history)
|
||||||
|
result2 = check_and_act('script_diversity_score', sd)
|
||||||
|
print(f"script_diversity_score = {sd:.3f} (triggered: {result2['triggered']})")
|
||||||
|
|
||||||
|
# Test TTS cost
|
||||||
|
tce = compute_signal('tts_cost_efficiency', usage=8500, limit=10000)
|
||||||
|
result3 = check_and_act('tts_cost_efficiency', tce)
|
||||||
|
print(f"tts_cost_efficiency = {tce:.3f} (triggered: {result3['triggered']}, action: {result3['action']})")
|
||||||
252
bots/shorts/hook_optimizer.py
Normal file
252
bots/shorts/hook_optimizer.py
Normal file
@@ -0,0 +1,252 @@
|
|||||||
|
"""
|
||||||
|
bots/shorts/hook_optimizer.py
|
||||||
|
Hook text quality scoring and optimization.
|
||||||
|
|
||||||
|
HookOptimizer:
|
||||||
|
- score(hook): 0-100 quality score based on pattern match + keyword strength
|
||||||
|
- optimize(hook, article, max_attempts): regenerate if score < 70
|
||||||
|
|
||||||
|
V3.0 scope: pattern matching + LLM regeneration via existing writer_bot
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Hook patterns mapped to template strings with {N} placeholder for numbers
|
||||||
|
HOOK_PATTERNS = {
|
||||||
|
'disbelief': [
|
||||||
|
'이거 모르면 손해',
|
||||||
|
'이게 무료라고?',
|
||||||
|
'이걸 아직도 모른다고?',
|
||||||
|
'믿기 힘들지만 사실입니다',
|
||||||
|
'실화입니다',
|
||||||
|
],
|
||||||
|
'warning': [
|
||||||
|
'절대 하지 마세요',
|
||||||
|
'이것만은 피하세요',
|
||||||
|
'지금 당장 멈추세요',
|
||||||
|
'알면 충격받을 수 있습니다',
|
||||||
|
],
|
||||||
|
'number': [
|
||||||
|
'단 {N}초면',
|
||||||
|
'{N}%가 모르는',
|
||||||
|
'{N}가지 방법',
|
||||||
|
'{N}배 빠른',
|
||||||
|
'상위 {N}%',
|
||||||
|
],
|
||||||
|
'question': [
|
||||||
|
'왜 아무도 안 알려줄까?',
|
||||||
|
'진짜일까?',
|
||||||
|
'이게 가능한 이유',
|
||||||
|
'어떻게 하는 걸까?',
|
||||||
|
],
|
||||||
|
'urgency': [
|
||||||
|
'지금 당장',
|
||||||
|
'오늘 안에',
|
||||||
|
'지금 안 보면 후회',
|
||||||
|
'당장 시작해야 하는 이유',
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
# High-value keywords that boost score (Korean viral hook words)
|
||||||
|
HIGH_VALUE_KEYWORDS = [
|
||||||
|
'무료', '공짜', '비밀', '충격', '실화', '진짜', '불법',
|
||||||
|
'모르는', '숨겨진', '알려지지 않은', '믿기지 않는', '손해',
|
||||||
|
'당장', '지금', '반드시', '절대', '꼭', '필수',
|
||||||
|
'돈', '수익', '수입', '부자', '성공', '자유',
|
||||||
|
'초보', '누구나', '쉬운', '간단한',
|
||||||
|
]
|
||||||
|
|
||||||
|
# Weak words that reduce score
|
||||||
|
WEAK_KEYWORDS = [
|
||||||
|
'알아보겠습니다', '살펴보겠습니다', '설명드리겠습니다',
|
||||||
|
'안녕하세요', '오늘은', '이번에는', '먼저',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class HookOptimizer:
|
||||||
|
"""
|
||||||
|
Scores and optimizes hook text for shorts videos.
|
||||||
|
|
||||||
|
Score = pattern_score (0-50) + keyword_score (0-30) + length_score (0-20)
|
||||||
|
Threshold: 70 — below this triggers regeneration
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, threshold: int = 70):
|
||||||
|
self.threshold = threshold
|
||||||
|
self._recently_used_patterns: list[str] = [] # avoid repetition
|
||||||
|
|
||||||
|
def score(self, hook: str) -> int:
|
||||||
|
"""
|
||||||
|
Score a hook text from 0-100.
|
||||||
|
|
||||||
|
Components:
|
||||||
|
- pattern_score (0-50): does it match a known viral pattern?
|
||||||
|
- keyword_score (0-30): does it contain high-value keywords?
|
||||||
|
- length_score (0-20): optimal length (15-30 chars = max)
|
||||||
|
"""
|
||||||
|
if not hook:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
pattern_score = self._score_pattern(hook)
|
||||||
|
keyword_score = self._score_keywords(hook)
|
||||||
|
length_score = self._score_length(hook)
|
||||||
|
|
||||||
|
total = min(100, pattern_score + keyword_score + length_score)
|
||||||
|
return total
|
||||||
|
|
||||||
|
def optimize(
|
||||||
|
self,
|
||||||
|
hook: str,
|
||||||
|
article: dict,
|
||||||
|
max_attempts: int = 3,
|
||||||
|
llm_fn=None,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Score hook. If score < threshold, regenerate up to max_attempts times.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
hook: Initial hook text
|
||||||
|
article: Article dict with keys: title, body, corner, key_points
|
||||||
|
max_attempts: Max regeneration attempts
|
||||||
|
llm_fn: Optional callable(prompt) -> str for LLM regeneration.
|
||||||
|
If None, returns original hook (LLM not available).
|
||||||
|
|
||||||
|
Returns: Best hook found (may still be below threshold if all attempts fail)
|
||||||
|
"""
|
||||||
|
current = hook
|
||||||
|
best = hook
|
||||||
|
best_score = self.score(hook)
|
||||||
|
|
||||||
|
logger.info(f'[훅] 초기 점수: {best_score}/100 — "{hook[:30]}..."')
|
||||||
|
|
||||||
|
if best_score >= self.threshold:
|
||||||
|
return hook
|
||||||
|
|
||||||
|
if llm_fn is None:
|
||||||
|
logger.warning(f'[훅] 점수 부족 ({best_score}/100) — LLM 없음, 원본 사용')
|
||||||
|
return hook
|
||||||
|
|
||||||
|
for attempt in range(max_attempts):
|
||||||
|
prompt = self._build_regeneration_prompt(current, article, best_score)
|
||||||
|
|
||||||
|
try:
|
||||||
|
new_hook = llm_fn(prompt)
|
||||||
|
if new_hook:
|
||||||
|
new_hook = new_hook.strip().split('\n')[0] # Take first line
|
||||||
|
new_score = self.score(new_hook)
|
||||||
|
logger.info(f'[훅] 시도 {attempt+1}: {new_score}/100 — "{new_hook[:30]}"')
|
||||||
|
|
||||||
|
if new_score > best_score:
|
||||||
|
best = new_hook
|
||||||
|
best_score = new_score
|
||||||
|
|
||||||
|
if best_score >= self.threshold:
|
||||||
|
break
|
||||||
|
|
||||||
|
current = new_hook
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f'[훅] LLM 재생성 실패 (시도 {attempt+1}): {e}')
|
||||||
|
break
|
||||||
|
|
||||||
|
logger.info(f'[훅] 최종 점수: {best_score}/100 — "{best[:30]}"')
|
||||||
|
return best
|
||||||
|
|
||||||
|
def _score_pattern(self, hook: str) -> int:
|
||||||
|
"""Check if hook matches known viral patterns. Max 50 points."""
|
||||||
|
for pattern_name, templates in HOOK_PATTERNS.items():
|
||||||
|
for template in templates:
|
||||||
|
# Check for fuzzy match (template with {N} filled in)
|
||||||
|
pattern_re = re.escape(template).replace(r'\{N\}', r'\d+')
|
||||||
|
if re.search(pattern_re, hook):
|
||||||
|
# Recently used pattern gets reduced score
|
||||||
|
if pattern_name in self._recently_used_patterns[-3:]:
|
||||||
|
return 30
|
||||||
|
self._recently_used_patterns.append(pattern_name)
|
||||||
|
return 50
|
||||||
|
# Partial match check
|
||||||
|
core = template.replace('{N}', '').strip()
|
||||||
|
if len(core) > 3 and core in hook:
|
||||||
|
return 35
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def _score_keywords(self, hook: str) -> int:
|
||||||
|
"""Score based on high-value/weak keywords. Max 30 points."""
|
||||||
|
score = 0
|
||||||
|
for kw in HIGH_VALUE_KEYWORDS:
|
||||||
|
if kw in hook:
|
||||||
|
score += 10
|
||||||
|
if score >= 30:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Penalize weak words
|
||||||
|
for kw in WEAK_KEYWORDS:
|
||||||
|
if kw in hook:
|
||||||
|
score -= 15
|
||||||
|
|
||||||
|
return max(0, min(30, score))
|
||||||
|
|
||||||
|
def _score_length(self, hook: str) -> int:
|
||||||
|
"""Score based on hook length. Max 20 points. Optimal: 15-30 chars."""
|
||||||
|
length = len(hook)
|
||||||
|
if 15 <= length <= 30:
|
||||||
|
return 20
|
||||||
|
elif 10 <= length < 15 or 30 < length <= 40:
|
||||||
|
return 10
|
||||||
|
elif length < 10:
|
||||||
|
return 5
|
||||||
|
else: # > 40 chars
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def _build_regeneration_prompt(self, hook: str, article: dict, current_score: int) -> str:
|
||||||
|
"""Build LLM prompt for hook regeneration."""
|
||||||
|
title = article.get('title', '')
|
||||||
|
corner = article.get('corner', '')
|
||||||
|
key_points = article.get('key_points', [])
|
||||||
|
recently_used = ', '.join(self._recently_used_patterns[-3:]) if self._recently_used_patterns else '없음'
|
||||||
|
|
||||||
|
points_str = '\n'.join(f'- {p}' for p in key_points[:3]) if key_points else ''
|
||||||
|
|
||||||
|
return f"""다음 쇼츠 영상의 훅 텍스트를 개선해주세요.
|
||||||
|
|
||||||
|
현재 훅: {hook}
|
||||||
|
현재 점수: {current_score}/100 (기준: 70점 이상)
|
||||||
|
|
||||||
|
콘텐츠 정보:
|
||||||
|
- 제목: {title}
|
||||||
|
- 코너: {corner}
|
||||||
|
- 핵심 포인트: {points_str}
|
||||||
|
|
||||||
|
요구사항:
|
||||||
|
1. 15-30자 이내
|
||||||
|
2. 다음 패턴 중 하나 사용: 충격/의심/경고/숫자/긴급
|
||||||
|
3. 최근 사용된 패턴 제외: {recently_used}
|
||||||
|
4. 한국어로 작성
|
||||||
|
5. 훅 텍스트만 출력 (설명 없이)
|
||||||
|
|
||||||
|
개선된 훅:"""
|
||||||
|
|
||||||
|
|
||||||
|
# ── Standalone test ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
if '--test' in sys.argv:
|
||||||
|
optimizer = HookOptimizer()
|
||||||
|
test_hooks = [
|
||||||
|
'이거 모르면 손해입니다!',
|
||||||
|
'안녕하세요 오늘은 AI에 대해 설명드리겠습니다',
|
||||||
|
'100%가 모르는 무료 도구',
|
||||||
|
'지금 당장 이것만은 절대 하지 마세요',
|
||||||
|
'어',
|
||||||
|
]
|
||||||
|
print("=== Hook Optimizer Test ===")
|
||||||
|
for hook in test_hooks:
|
||||||
|
s = optimizer.score(hook)
|
||||||
|
print(f'점수 {s:3d}/100: "{hook}"')
|
||||||
|
print()
|
||||||
|
print("Pattern test:")
|
||||||
|
for category in HOOK_PATTERNS:
|
||||||
|
print(f" {category}: {len(HOOK_PATTERNS[category])} patterns")
|
||||||
Reference in New Issue
Block a user