import json from typing import Optional from app.services.routing import RoutingResult METRIC_KEYS = [ "total_count", "faq_hit_count", "rag_hit_count", "llm_hit_count", "fallback_count", "timeout_count", "response_ms_sum", "blocked_attempts", ] P95_SORTED_SET = "response_ms_p95_buf" P95_MAX_SIZE = 10000 _SOURCE_TO_KEY = { "faq": "faq_hit_count", "rag": "rag_hit_count", "llm": "llm_hit_count", "fallback": "fallback_count", } class MetricsCollector: def __init__(self, redis_client): self.redis = redis_client def _prefix(self, tenant_id: str) -> str: return f"tenant:{tenant_id}:metrics" def _p95_key(self, tenant_id: str) -> str: return f"tenant:{tenant_id}:{P95_SORTED_SET}" async def record(self, tenant_id: str, result: RoutingResult) -> None: prefix = self._prefix(tenant_id) p95_key = self._p95_key(tenant_id) pipe = self.redis.pipeline() pipe.hincrby(prefix, "total_count", 1) source_key = _SOURCE_TO_KEY.get(result.source) if source_key: pipe.hincrby(prefix, source_key, 1) if result.is_timeout: pipe.hincrby(prefix, "timeout_count", 1) pipe.hincrby(prefix, "response_ms_sum", result.elapsed_ms) # p95 sorted set — score=elapsed_ms, member=unique id import time member = f"{time.time_ns()}" pipe.zadd(p95_key, {member: result.elapsed_ms}) pipe.zremrangebyrank(p95_key, 0, -(P95_MAX_SIZE + 1)) await pipe.execute() async def get_overview(self, tenant_id: str) -> dict: prefix = self._prefix(tenant_id) p95_key = self._p95_key(tenant_id) raw = await self.redis.hgetall(prefix) counts = {k: int(v) for k, v in raw.items()} if raw else {} total = counts.get("total_count", 0) avg_ms = counts.get("response_ms_sum", 0) // max(total, 1) # p95 계산 p95_ms = 0 buf_size = await self.redis.zcard(p95_key) if buf_size > 0: p95_idx = max(0, int(buf_size * 0.95) - 1) p95_items = await self.redis.zrange(p95_key, p95_idx, p95_idx, withscores=True) if p95_items: p95_ms = int(p95_items[0][1]) rates = {} if total > 0: rates["faq_hit_rate"] = round(counts.get("faq_hit_count", 0) / total * 100, 2) rates["rag_hit_rate"] = round(counts.get("rag_hit_count", 0) / total * 100, 2) rates["fallback_rate"] = round(counts.get("fallback_count", 0) / total * 100, 2) rates["timeout_rate"] = round(counts.get("timeout_count", 0) / total * 100, 2) else: rates = {"faq_hit_rate": 0.0, "rag_hit_rate": 0.0, "fallback_rate": 0.0, "timeout_rate": 0.0} return { "counts": counts, "rates": rates, "avg_ms": avg_ms, "p95_ms": p95_ms, }