Files
Gov-chat-bot/backend/app/services/metrics.py
2026-03-26 12:49:43 +09:00

96 lines
2.9 KiB
Python

import json
from typing import Optional
from app.services.routing import RoutingResult
METRIC_KEYS = [
"total_count",
"faq_hit_count",
"rag_hit_count",
"llm_hit_count",
"fallback_count",
"timeout_count",
"response_ms_sum",
"blocked_attempts",
]
P95_SORTED_SET = "response_ms_p95_buf"
P95_MAX_SIZE = 10000
_SOURCE_TO_KEY = {
"faq": "faq_hit_count",
"rag": "rag_hit_count",
"llm": "llm_hit_count",
"fallback": "fallback_count",
}
class MetricsCollector:
def __init__(self, redis_client):
self.redis = redis_client
def _prefix(self, tenant_id: str) -> str:
return f"tenant:{tenant_id}:metrics"
def _p95_key(self, tenant_id: str) -> str:
return f"tenant:{tenant_id}:{P95_SORTED_SET}"
async def record(self, tenant_id: str, result: RoutingResult) -> None:
prefix = self._prefix(tenant_id)
p95_key = self._p95_key(tenant_id)
pipe = self.redis.pipeline()
pipe.hincrby(prefix, "total_count", 1)
source_key = _SOURCE_TO_KEY.get(result.source)
if source_key:
pipe.hincrby(prefix, source_key, 1)
if result.is_timeout:
pipe.hincrby(prefix, "timeout_count", 1)
pipe.hincrby(prefix, "response_ms_sum", result.elapsed_ms)
# p95 sorted set — score=elapsed_ms, member=unique id
import time
member = f"{time.time_ns()}"
pipe.zadd(p95_key, {member: result.elapsed_ms})
pipe.zremrangebyrank(p95_key, 0, -(P95_MAX_SIZE + 1))
await pipe.execute()
async def get_overview(self, tenant_id: str) -> dict:
prefix = self._prefix(tenant_id)
p95_key = self._p95_key(tenant_id)
raw = await self.redis.hgetall(prefix)
counts = {k: int(v) for k, v in raw.items()} if raw else {}
total = counts.get("total_count", 0)
avg_ms = counts.get("response_ms_sum", 0) // max(total, 1)
# p95 계산
p95_ms = 0
buf_size = await self.redis.zcard(p95_key)
if buf_size > 0:
p95_idx = max(0, int(buf_size * 0.95) - 1)
p95_items = await self.redis.zrange(p95_key, p95_idx, p95_idx, withscores=True)
if p95_items:
p95_ms = int(p95_items[0][1])
rates = {}
if total > 0:
rates["faq_hit_rate"] = round(counts.get("faq_hit_count", 0) / total * 100, 2)
rates["rag_hit_rate"] = round(counts.get("rag_hit_count", 0) / total * 100, 2)
rates["fallback_rate"] = round(counts.get("fallback_count", 0) / total * 100, 2)
rates["timeout_rate"] = round(counts.get("timeout_count", 0) / total * 100, 2)
else:
rates = {"faq_hit_rate": 0.0, "rag_hit_rate": 0.0, "fallback_rate": 0.0, "timeout_rate": 0.0}
return {
"counts": counts,
"rates": rates,
"avg_ms": avg_ms,
"p95_ms": p95_ms,
}