Files
Gov-chat-bot/backend/app/services/routing.py
2026-03-26 12:49:43 +09:00

244 lines
8.5 KiB
Python

import asyncio
from dataclasses import dataclass, field
from typing import Optional
@dataclass
class RoutingResult:
answer: str
tier: str # 'A'|'B'|'C'|'D'
source: str # 'faq'|'rag'|'llm'|'fallback'
faq_id: Optional[str] = None
doc_id: Optional[str] = None
doc_name: Optional[str] = None
doc_date: Optional[str] = None
score: float = 0.0
elapsed_ms: int = 0
is_timeout: bool = False
request_id: Optional[str] = None
def to_dict(self) -> dict:
citations = []
if self.doc_name:
citations.append({"doc": self.doc_name, "date": self.doc_date or ""})
return {
"answer": self.answer,
"tier": self.tier,
"source": self.source,
"faq_id": self.faq_id,
"doc_id": self.doc_id,
"score": self.score,
"elapsed_ms": self.elapsed_ms,
"is_timeout": self.is_timeout,
"request_id": self.request_id,
"citations": citations,
}
class ResponseRouter:
TIMEOUT_MS = 4500 # 4.5초 — 카카오 5초 한계 - 500ms
def __init__(self, tenant_config: dict, providers: dict):
self.tenant_config = tenant_config
self.providers = providers
async def route(
self,
tenant_id: str,
utterance: str,
user_key: str,
request_id: Optional[str] = None,
db=None,
) -> RoutingResult:
import time
start = time.monotonic()
try:
return await asyncio.wait_for(
self._try_tiers(tenant_id, utterance, user_key, request_id, db, start),
timeout=self.TIMEOUT_MS / 1000,
)
except asyncio.TimeoutError:
elapsed = int((time.monotonic() - start) * 1000)
return self._tier_d(tenant_id, elapsed, is_timeout=True, request_id=request_id)
async def _try_tiers(
self,
tenant_id: str,
utterance: str,
user_key: str,
request_id: Optional[str],
db,
start: float,
) -> RoutingResult:
import time
# Tier A — FAQ 임베딩 유사도 검색
tier_a = await self._try_tier_a(tenant_id, utterance, db)
if tier_a is not None:
tier_a.elapsed_ms = int((time.monotonic() - start) * 1000)
tier_a.request_id = request_id
return tier_a
# Tier C — LLM 기반 재서술 (RAG 근거 있음 + LLM 활성화)
# Tier B보다 먼저 시도: LLM 활성 시 템플릿 대신 재서술
tier_c = await self._try_tier_c(tenant_id, utterance, db)
if tier_c is not None:
tier_c.elapsed_ms = int((time.monotonic() - start) * 1000)
tier_c.request_id = request_id
return tier_c
# Tier B — RAG 문서 검색 (LLM 비활성 또는 Tier C 실패 시)
tier_b = await self._try_tier_b(tenant_id, utterance, db)
if tier_b is not None:
tier_b.elapsed_ms = int((time.monotonic() - start) * 1000)
tier_b.request_id = request_id
return tier_b
elapsed = int((time.monotonic() - start) * 1000)
return self._tier_d(tenant_id, elapsed, request_id=request_id)
async def _try_tier_a(self, tenant_id: str, utterance: str, db) -> Optional[RoutingResult]:
"""Tier A — FAQ 임베딩 유사도 ≥ 0.85."""
embedding_provider = self.providers.get("embedding")
vectordb_provider = self.providers.get("vectordb")
if embedding_provider is None or vectordb_provider is None or db is None:
return None
from app.services.faq_search import FAQSearchService
service = FAQSearchService(embedding_provider, vectordb_provider, db)
match = await service.search(tenant_id, utterance)
if match is None:
return None
faq, score = match
# hit_count 비동기 증가 (fire-and-forget)
await service.increment_hit(faq.id)
citation_date = (
faq.updated_at.strftime("%Y.%m") if faq.updated_at else ""
)
return RoutingResult(
answer=faq.answer,
tier="A",
source="faq",
faq_id=faq.id,
doc_name=f"FAQ: {faq.question[:30]}",
doc_date=citation_date,
score=score,
)
async def _try_tier_c(self, tenant_id: str, utterance: str, db) -> Optional[RoutingResult]:
"""Tier C — RAG 근거 있음 + LLM 활성화 → 근거 기반 재서술."""
llm_provider = self.providers.get("llm")
embedding_provider = self.providers.get("embedding")
vectordb_provider = self.providers.get("vectordb")
if llm_provider is None or embedding_provider is None or vectordb_provider is None or db is None:
return None
# NullLLMProvider → None 즉시 반환
from app.providers.llm import NullLLMProvider
if isinstance(llm_provider, NullLLMProvider):
return None
# RAG 검색 (Tier B와 동일 임계값)
from app.services.rag_search import RAGSearchService
rag_service = RAGSearchService(embedding_provider, vectordb_provider, db)
rag_results = await rag_service.search(tenant_id, utterance)
if not rag_results:
return None # 근거 없으면 LLM 미호출 (P6 할루시네이션 방지)
# 근거 기반 LLM 재서술
context_chunks = [r.chunk_text for r in rag_results[:3]]
context_str = "\n---\n".join(context_chunks)
tenant_name = self.tenant_config.get("tenant_name", "")
name_prefix = f"{tenant_name}" if tenant_name else ""
system_prompt = (
f"당신은 {name_prefix}AI 안내 도우미입니다.\n"
f"반드시 아래 근거 문서의 내용만을 바탕으로 답변하세요.\n"
f"근거 없는 내용은 절대 추측하지 마세요.\n\n"
f"근거 문서:\n{context_str}"
)
answer = await llm_provider.generate(
system_prompt=system_prompt,
user_message=utterance,
context_chunks=context_chunks,
)
if answer is None:
return None # LLM 실패 → Tier D로 폴백
best = rag_results[0]
return RoutingResult(
answer=answer,
tier="C",
source="llm",
doc_id=best.doc.id,
doc_name=best.doc_name,
doc_date=best.doc_date,
score=best.score,
)
async def _try_tier_b(self, tenant_id: str, utterance: str, db) -> Optional[RoutingResult]:
"""Tier B — RAG 유사도 ≥ 0.70 + 근거 문서 존재."""
embedding_provider = self.providers.get("embedding")
vectordb_provider = self.providers.get("vectordb")
if embedding_provider is None or vectordb_provider is None or db is None:
return None
from app.services.rag_search import RAGSearchService
service = RAGSearchService(embedding_provider, vectordb_provider, db)
results = await service.search(tenant_id, utterance)
if not results:
return None
best = results[0]
answer = service.build_answer(utterance, results)
return RoutingResult(
answer=answer,
tier="B",
source="rag",
doc_id=best.doc.id,
doc_name=best.doc_name,
doc_date=best.doc_date,
score=best.score,
)
def _tier_d(
self,
tenant_id: str,
elapsed_ms: int,
is_timeout: bool = False,
request_id: Optional[str] = None,
) -> RoutingResult:
# DB 조회 없이 tenant_config 메모리에서 직접 읽음 (~5ms)
phone = self.tenant_config.get("phone_number", "")
contact = self.tenant_config.get("fallback_dept", "")
name = self.tenant_config.get("tenant_name", "")
if phone and contact:
answer = f"해당 문의는 {name} {contact}({phone})로 연락해 주세요."
elif phone:
answer = f"해당 문의는 {name}({phone})로 연락해 주세요." if name else f"해당 문의는 {phone}로 연락해 주세요."
elif name:
answer = f"죄송합니다. {name}에 직접 문의해 주세요."
else:
answer = "죄송합니다. 해당 내용을 찾을 수 없습니다. 담당자에게 직접 문의해 주세요."
return RoutingResult(
answer=answer,
tier="D",
source="fallback",
elapsed_ms=elapsed_ms,
is_timeout=is_timeout,
request_id=request_id,
)