feat(v3): PR 6 — HookOptimizer + MicroSignals (3 signals)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-29 11:56:34 +09:00
parent 834577fc07
commit 0dedb0d7f8
3 changed files with 484 additions and 0 deletions
@@ -0,0 +1,17 @@
+"""
+bots/quality
+Quality signal computation for shorts content.
+
+V3.0 signals:
+  - motion_variation_score
+  - script_diversity_score
+  - tts_cost_efficiency
+
+V3.1+ additions:
+  - semantic_visual_score
+  - caption_overlap_score
+  - pacing_variation_score
+"""
+from .micro_signals import compute_signal, SIGNALS_V1
+
+__all__ = ['compute_signal', 'SIGNALS_V1']
@@ -0,0 +1,215 @@
+"""
+bots/quality/micro_signals.py
+Micro-failure quality signals for shorts content.
+
+V3.0 scope: 3 signals
+  - motion_variation_score: detects repetitive motion patterns
+  - script_diversity_score: detects structural overlap with recent scripts
+  - tts_cost_efficiency: monitors TTS credit usage
+
+Each signal returns a float 0.0-1.0 where:
+  - 1.0 = perfect / no issue
+  - 0.0 = critical problem
+  - threshold = action trigger point
+"""
+import logging
+from pathlib import Path
+from typing import Callable, Any
+
+logger = logging.getLogger(__name__)
+
+SIGNALS_V1 = {
+    'motion_variation_score': {
+        'description': 'Consecutive clips using same motion pattern',
+        'threshold': 0.6,
+        'action': 'auto_fix',   # pick different pattern automatically
+        'higher_is_better': True,
+    },
+    'script_diversity_score': {
+        'description': 'Script structure overlap with last 7 days',
+        'threshold': 0.5,
+        'action': 'regenerate',  # request different structure from LLM
+        'higher_is_better': True,
+    },
+    'tts_cost_efficiency': {
+        'description': 'TTS credit usage vs monthly limit',
+        'threshold': 0.8,
+        'action': 'switch_engine',  # downgrade to local TTS
+        'higher_is_better': False,  # lower usage = better
+    },
+}
+
+
+def compute_signal(signal_name: str, **kwargs) -> float:
+    """
+    Compute a quality signal value.
+
+    Args:
+        signal_name: One of SIGNALS_V1 keys
+        **kwargs: Signal-specific inputs (see individual compute functions)
+
+    Returns: float 0.0-1.0
+
+    Raises: ValueError if signal_name unknown
+    """
+    if signal_name not in SIGNALS_V1:
+        raise ValueError(f'Unknown signal: {signal_name}. Available: {list(SIGNALS_V1.keys())}')
+
+    compute_fns = {
+        'motion_variation_score': _compute_motion_variation,
+        'script_diversity_score': _compute_script_diversity,
+        'tts_cost_efficiency': _compute_tts_cost_efficiency,
+    }
+
+    fn = compute_fns[signal_name]
+    try:
+        value = fn(**kwargs)
+        logger.debug(f'[품질] {signal_name} = {value:.3f}')
+        return value
+    except Exception as e:
+        logger.warning(f'[품질] 신호 계산 실패 ({signal_name}): {e}')
+        return 1.0  # Neutral value on error (don't trigger action)
+
+
+def check_and_act(signal_name: str, value: float) -> dict:
+    """
+    Check if signal value crosses threshold and return action.
+
+    Returns: {
+        'triggered': bool,
+        'action': str or None,
+        'value': float,
+        'threshold': float,
+    }
+    """
+    if signal_name not in SIGNALS_V1:
+        return {'triggered': False, 'action': None, 'value': value, 'threshold': 0}
+
+    config = SIGNALS_V1[signal_name]
+    threshold = config['threshold']
+    higher_is_better = config.get('higher_is_better', True)
+
+    if higher_is_better:
+        triggered = value < threshold
+    else:
+        triggered = value > threshold
+
+    return {
+        'triggered': triggered,
+        'action': config['action'] if triggered else None,
+        'value': value,
+        'threshold': threshold,
+    }
+
+
+def _compute_motion_variation(clips: list, **kwargs) -> float:
+    """
+    Compute motion variation score.
+
+    Args:
+        clips: list of dicts with 'pattern' key, e.g. [{'pattern': 'ken_burns_in'}, ...]
+
+    Returns: 0.0-1.0 diversity score
+    """
+    if not clips or len(clips) < 2:
+        return 1.0
+
+    patterns = [c.get('pattern', '') for c in clips if c.get('pattern')]
+    if not patterns:
+        return 1.0
+
+    # Count consecutive same-pattern pairs
+    consecutive_same = sum(
+        1 for i in range(len(patterns) - 1)
+        if patterns[i] == patterns[i+1]
+    )
+
+    # Unique patterns ratio
+    unique_ratio = len(set(patterns)) / len(patterns)
+    consecutive_penalty = consecutive_same / max(len(patterns) - 1, 1)
+
+    score = unique_ratio * (1 - consecutive_penalty)
+    return round(min(1.0, max(0.0, score)), 3)
+
+
+def _compute_script_diversity(script: dict, history: list = None, **kwargs) -> float:
+    """
+    Compute script structure diversity vs recent history.
+
+    Args:
+        script: Current script dict with 'hook', 'body', 'closer'
+        history: List of recent scripts (last 7 days), each same format
+
+    Returns: 0.0-1.0 diversity score (1.0 = very diverse)
+    """
+    if not history:
+        return 1.0
+
+    # Compare script structure fingerprints
+    def _fingerprint(s: dict) -> tuple:
+        hook = s.get('hook', '')
+        body = s.get('body', [])
+        closer = s.get('closer', '')
+        return (
+            len(hook) // 10,  # rough length bucket
+            len(body),         # number of body sentences
+            hook[:5] if hook else '',   # hook start
+        )
+
+    current_fp = _fingerprint(script)
+
+    overlaps = sum(
+        1 for h in history
+        if _fingerprint(h) == current_fp
+    )
+
+    overlap_rate = overlaps / len(history)
+    return round(1.0 - overlap_rate, 3)
+
+
+def _compute_tts_cost_efficiency(usage: float, limit: float, **kwargs) -> float:
+    """
+    Compute TTS cost efficiency.
+
+    Args:
+        usage: Characters used this period
+        limit: Monthly/daily character limit
+
+    Returns: ratio (usage/limit), where > threshold triggers engine switch
+    """
+    if limit <= 0:
+        return 0.0
+    return round(min(1.0, usage / limit), 3)
+
+
+# ── Standalone test ──────────────────────────────────────────────
+
+if __name__ == '__main__':
+    import sys
+    if '--test' in sys.argv:
+        print("=== Micro Signals Test ===")
+
+        # Test motion variation
+        test_clips = [
+            {'pattern': 'ken_burns_in'},
+            {'pattern': 'ken_burns_in'},  # repeat!
+            {'pattern': 'pan_left'},
+            {'pattern': 'pan_right'},
+        ]
+        mv = compute_signal('motion_variation_score', clips=test_clips)
+        result = check_and_act('motion_variation_score', mv)
+        print(f"motion_variation_score = {mv:.3f} (triggered: {result['triggered']}, action: {result['action']})")
+
+        # Test script diversity
+        current_script = {'hook': '이거 모르면 손해', 'body': ['첫째', '둘째', '셋째'], 'closer': '구독'}
+        history = [
+            {'hook': '이거 모르면 손해2', 'body': ['a', 'b', 'c'], 'closer': '팔로우'},
+        ]
+        sd = compute_signal('script_diversity_score', script=current_script, history=history)
+        result2 = check_and_act('script_diversity_score', sd)
+        print(f"script_diversity_score = {sd:.3f} (triggered: {result2['triggered']})")
+
+        # Test TTS cost
+        tce = compute_signal('tts_cost_efficiency', usage=8500, limit=10000)
+        result3 = check_and_act('tts_cost_efficiency', tce)
+        print(f"tts_cost_efficiency = {tce:.3f} (triggered: {result3['triggered']}, action: {result3['action']})")