Initial commit: import from sinmb79/Gov-chat-bot

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
airkjw
2026-03-26 12:49:43 +09:00
commit a16c972dbb
104 changed files with 8063 additions and 0 deletions
View File
+39
View File
@@ -0,0 +1,39 @@
"""
감사 로그 기록 헬퍼.
표준 action: faq.create|faq.update|faq.delete
doc.upload|doc.approve|doc.delete
user.restrict|user.unblock
crawler.approve|crawler.reject
config.update
"""
from typing import Optional
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.audit import AuditLog
async def log_action(
db: AsyncSession,
tenant_id: str,
actor_id: str,
actor_type: str, # 'admin_user' | 'system_admin'
action: str,
target_type: Optional[str] = None,
target_id: Optional[str] = None,
diff: Optional[dict] = None,
ip_address: Optional[str] = None,
) -> AuditLog:
entry = AuditLog(
tenant_id=tenant_id,
actor_id=actor_id,
actor_type=actor_type,
action=action,
target_type=target_type,
target_id=target_id,
diff=diff,
ip_address=ip_address,
)
db.add(entry)
await db.commit()
return entry
+44
View File
@@ -0,0 +1,44 @@
"""
민원 이력 DB 저장.
개인정보 원칙: utterance_masked(마스킹), user_key(SHA-256 해시 16자리).
원문 미저장.
"""
from typing import Optional
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.complaint import ComplaintLog
from app.services.masking import mask_text, hash_user_key
from app.services.routing import RoutingResult
async def log_complaint(
db: AsyncSession,
tenant_id: str,
raw_utterance: str,
raw_user_id: str,
result: RoutingResult,
channel: str = "kakao",
) -> ComplaintLog:
"""
민원 이력을 ComplaintLog에 기록.
- utterance: 마스킹 후 저장
- user_key: SHA-256 해시 16자리
- 원문 미저장
"""
entry = ComplaintLog(
tenant_id=tenant_id,
user_key=hash_user_key(raw_user_id),
utterance_masked=mask_text(raw_utterance)[:1000],
channel=channel,
request_id=result.request_id,
response_tier=result.tier,
response_source=result.source,
faq_id=result.faq_id,
doc_id=result.doc_id,
response_ms=result.elapsed_ms,
is_timeout=result.is_timeout,
)
db.add(entry)
await db.commit()
return entry
+92
View File
@@ -0,0 +1,92 @@
"""
웹 크롤러 — httpx + BeautifulSoup4.
robots.txt 준수. CrawlerURL 기반.
"""
from typing import Optional
from datetime import datetime, timezone
import httpx
from bs4 import BeautifulSoup
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.knowledge import CrawlerURL, Document
CRAWLER_HEADERS = {
"User-Agent": "SmartBot-KR/1.0 (+https://github.com/sinmb79/Gov-chat-bot)",
}
CRAWL_TIMEOUT = 15 # 초
async def check_robots_txt(base_url: str, target_path: str) -> bool:
"""robots.txt 확인. 크롤링 허용 여부 반환."""
try:
from urllib.parse import urlparse
parsed = urlparse(base_url)
robots_url = f"{parsed.scheme}://{parsed.netloc}/robots.txt"
async with httpx.AsyncClient(timeout=5) as client:
resp = await client.get(robots_url, headers=CRAWLER_HEADERS)
if resp.status_code != 200:
return True # robots.txt 없으면 허용으로 간주
content = resp.text.lower()
# 간단한 User-agent: * Disallow 체크
lines = content.splitlines()
in_block = False
for line in lines:
line = line.strip()
if line.startswith("user-agent:"):
agent = line.split(":", 1)[1].strip()
in_block = agent in ("*", "smartbot-kr")
elif in_block and line.startswith("disallow:"):
disallowed = line.split(":", 1)[1].strip()
if disallowed and target_path.startswith(disallowed):
return False
return True
except Exception:
return True # 확인 불가 시 허용
async def crawl_url(url: str) -> Optional[str]:
"""URL 크롤링 → 텍스트 추출. 실패 시 None."""
try:
from urllib.parse import urlparse
parsed = urlparse(url)
target_path = parsed.path or "/"
if not await check_robots_txt(url, target_path):
return None # robots.txt 불허
async with httpx.AsyncClient(
timeout=CRAWL_TIMEOUT,
follow_redirects=True,
headers=CRAWLER_HEADERS,
) as client:
resp = await client.get(url)
resp.raise_for_status()
content_type = resp.headers.get("content-type", "")
if "html" in content_type:
soup = BeautifulSoup(resp.content, "html.parser")
for tag in soup(["script", "style", "nav", "footer", "header"]):
tag.decompose()
return soup.get_text(separator="\n", strip=True)
else:
return resp.text
except Exception:
return None
class CrawlerService:
def __init__(self, db: AsyncSession):
self.db = db
async def run(self, crawler_url: CrawlerURL, tenant_id: str) -> Optional[str]:
"""크롤러 URL 실행 → 텍스트 반환."""
text = await crawl_url(crawler_url.url)
# last_crawled 업데이트
crawler_url.last_crawled = datetime.now(timezone.utc)
await self.db.commit()
return text
@@ -0,0 +1,88 @@
"""
문서 처리 파이프라인:
파싱 → 청킹 → 임베딩 → VectorDB 저장 → Document 레코드 업데이트
"""
from typing import Optional
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.knowledge import Document
from app.providers.embedding import EmbeddingProvider
from app.providers.vectordb import VectorDBProvider
from app.services.parsers.text_parser import extract_text, chunk_text
class DocumentProcessor:
def __init__(
self,
embedding_provider: EmbeddingProvider,
vectordb_provider: VectorDBProvider,
db: AsyncSession,
):
self.embedding = embedding_provider
self.vectordb = vectordb_provider
self.db = db
async def process(self, tenant_id: str, doc: Document, content: bytes) -> int:
"""
문서를 파싱·청킹·임베딩하여 VectorDB에 저장.
chunk_count 반환. 실패 시 0.
"""
# 1. 텍스트 추출
text = extract_text(content, doc.filename)
if not text or not text.strip():
doc.status = "parse_failed"
await self.db.commit()
return 0
# 2. 청킹
chunks = chunk_text(text)
if not chunks:
doc.status = "parse_failed"
await self.db.commit()
return 0
# 3. 임베딩
try:
embeddings = await self.embedding.embed(chunks)
except NotImplementedError:
doc.status = "embedding_unavailable"
await self.db.commit()
return 0
except Exception:
doc.status = "embedding_failed"
await self.db.commit()
return 0
# 4. 메타데이터 구성
published = doc.published_at.strftime("%Y.%m") if doc.published_at else ""
metadatas = [
{
"doc_id": doc.id,
"filename": doc.filename,
"chunk_idx": i,
"published_at": published,
"tenant_id": tenant_id,
}
for i in range(len(chunks))
]
# 5. VectorDB 저장
await self.vectordb.upsert(
tenant_id=tenant_id,
doc_id=doc.id,
chunks=chunks,
embeddings=embeddings,
metadatas=metadatas,
)
# 6. Document 레코드 업데이트
doc.chunk_count = len(chunks)
doc.status = "processed"
await self.db.commit()
return len(chunks)
async def delete(self, tenant_id: str, doc_id: str) -> None:
"""VectorDB에서 문서 청크 삭제."""
await self.vectordb.delete(tenant_id=tenant_id, doc_id=doc_id)
+94
View File
@@ -0,0 +1,94 @@
from typing import Optional
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.knowledge import FAQ
from app.providers.embedding import EmbeddingProvider
from app.providers.vectordb import VectorDBProvider
from app.providers.base import SearchResult
FAQ_SIMILARITY_THRESHOLD = 0.85 # Tier A 기준
class FAQSearchService:
"""
Tier A — FAQ 임베딩 유사도 검색.
임베딩 유사도 ≥ 0.85 시 등록 FAQ 반환.
"""
def __init__(
self,
embedding_provider: EmbeddingProvider,
vectordb_provider: VectorDBProvider,
db: AsyncSession,
):
self.embedding = embedding_provider
self.vectordb = vectordb_provider
self.db = db
async def search(
self, tenant_id: str, utterance: str
) -> Optional[tuple[FAQ, float]]:
"""
발화를 임베딩 → 벡터DB 검색 → 0.85 이상이면 FAQ 반환.
없으면 None.
"""
try:
vecs = await self.embedding.embed([utterance])
except NotImplementedError:
return None
query_vec = vecs[0]
results = await self.vectordb.search(
tenant_id=tenant_id,
query_vec=query_vec,
top_k=1,
threshold=FAQ_SIMILARITY_THRESHOLD,
)
if not results:
return None
top: SearchResult = results[0]
faq_id = top.metadata.get("faq_id")
if not faq_id:
return None
faq = await self._load_faq(tenant_id, faq_id)
if faq is None:
return None
return faq, top.score
async def _load_faq(self, tenant_id: str, faq_id: str) -> Optional[FAQ]:
result = await self.db.execute(
select(FAQ).where(
FAQ.tenant_id == tenant_id,
FAQ.id == faq_id,
FAQ.is_active.is_(True),
)
)
return result.scalar_one_or_none()
async def increment_hit(self, faq_id: str) -> None:
"""FAQ hit_count 증가."""
faq = await self.db.get(FAQ, faq_id)
if faq:
faq.hit_count = (faq.hit_count or 0) + 1
await self.db.commit()
async def index_faq(self, tenant_id: str, faq: FAQ) -> None:
"""FAQ를 벡터DB에 색인."""
text = f"{faq.question}\n{faq.answer}"
try:
vecs = await self.embedding.embed([text])
except NotImplementedError:
return
await self.vectordb.upsert(
tenant_id=tenant_id,
doc_id=faq.id,
chunks=[text],
embeddings=vecs,
metadatas=[{"faq_id": faq.id, "question": faq.question}],
)
+28
View File
@@ -0,0 +1,28 @@
import json
from typing import Optional
class IdempotencyCache:
def __init__(self, redis_client):
self.redis = redis_client
self.ttl = 60 # 기본 TTL (Phase 0B에서 settings로 교체 예정)
def _key(self, tenant_id: str, request_id: str) -> str:
return f"idempotency:{tenant_id}:{request_id}"
async def get(self, tenant_id: str, request_id: Optional[str]) -> Optional[dict]:
if request_id is None:
return None
raw = await self.redis.get(self._key(tenant_id, request_id))
if raw is None:
return None
return json.loads(raw)
async def set(self, tenant_id: str, request_id: Optional[str], result_dict: dict) -> None:
if request_id is None:
return
await self.redis.setex(
self._key(tenant_id, request_id),
self.ttl,
json.dumps(result_dict),
)
+34
View File
@@ -0,0 +1,34 @@
import hashlib
import re
# 마스킹 패턴 정의
_PATTERNS = [
# 주민등록번호: 6자리-1~4자리+6자리 (숫자 금액과 구분: 앞에 비숫자 또는 시작, 뒤에 비숫자 또는 끝)
(re.compile(r"(?<!\d)\d{6}-[1-4]\d{6}(?!\d)"), "######-*######"),
# 전화번호: 010-1234-5678 형식
(re.compile(r"0\d{1,2}-\d{3,4}-\d{4}"), "***-****-****"),
# 이메일
(re.compile(r"[\w._%+\-]+@[\w.\-]+\.[a-zA-Z]{2,}"), "***@***.***"),
# 카드번호: 4자리씩 4그룹
(re.compile(r"\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}"), "****-****-****-****"),
]
def mask_text(text: str) -> str:
"""텍스트에서 개인정보 패턴을 마스킹하여 반환."""
for pattern, replacement in _PATTERNS:
text = pattern.sub(replacement, text)
return text
def hash_user_key(kakao_id: str) -> str:
"""SHA-256 해시 앞 16자리 반환."""
return hashlib.sha256(kakao_id.encode()).hexdigest()[:16]
def has_sensitive_data(text: str) -> bool:
"""텍스트에 개인정보 패턴이 포함되어 있으면 True."""
for pattern, _ in _PATTERNS:
if pattern.search(text):
return True
return False
+95
View File
@@ -0,0 +1,95 @@
import json
from typing import Optional
from app.services.routing import RoutingResult
METRIC_KEYS = [
"total_count",
"faq_hit_count",
"rag_hit_count",
"llm_hit_count",
"fallback_count",
"timeout_count",
"response_ms_sum",
"blocked_attempts",
]
P95_SORTED_SET = "response_ms_p95_buf"
P95_MAX_SIZE = 10000
_SOURCE_TO_KEY = {
"faq": "faq_hit_count",
"rag": "rag_hit_count",
"llm": "llm_hit_count",
"fallback": "fallback_count",
}
class MetricsCollector:
def __init__(self, redis_client):
self.redis = redis_client
def _prefix(self, tenant_id: str) -> str:
return f"tenant:{tenant_id}:metrics"
def _p95_key(self, tenant_id: str) -> str:
return f"tenant:{tenant_id}:{P95_SORTED_SET}"
async def record(self, tenant_id: str, result: RoutingResult) -> None:
prefix = self._prefix(tenant_id)
p95_key = self._p95_key(tenant_id)
pipe = self.redis.pipeline()
pipe.hincrby(prefix, "total_count", 1)
source_key = _SOURCE_TO_KEY.get(result.source)
if source_key:
pipe.hincrby(prefix, source_key, 1)
if result.is_timeout:
pipe.hincrby(prefix, "timeout_count", 1)
pipe.hincrby(prefix, "response_ms_sum", result.elapsed_ms)
# p95 sorted set — score=elapsed_ms, member=unique id
import time
member = f"{time.time_ns()}"
pipe.zadd(p95_key, {member: result.elapsed_ms})
pipe.zremrangebyrank(p95_key, 0, -(P95_MAX_SIZE + 1))
await pipe.execute()
async def get_overview(self, tenant_id: str) -> dict:
prefix = self._prefix(tenant_id)
p95_key = self._p95_key(tenant_id)
raw = await self.redis.hgetall(prefix)
counts = {k: int(v) for k, v in raw.items()} if raw else {}
total = counts.get("total_count", 0)
avg_ms = counts.get("response_ms_sum", 0) // max(total, 1)
# p95 계산
p95_ms = 0
buf_size = await self.redis.zcard(p95_key)
if buf_size > 0:
p95_idx = max(0, int(buf_size * 0.95) - 1)
p95_items = await self.redis.zrange(p95_key, p95_idx, p95_idx, withscores=True)
if p95_items:
p95_ms = int(p95_items[0][1])
rates = {}
if total > 0:
rates["faq_hit_rate"] = round(counts.get("faq_hit_count", 0) / total * 100, 2)
rates["rag_hit_rate"] = round(counts.get("rag_hit_count", 0) / total * 100, 2)
rates["fallback_rate"] = round(counts.get("fallback_count", 0) / total * 100, 2)
rates["timeout_rate"] = round(counts.get("timeout_count", 0) / total * 100, 2)
else:
rates = {"faq_hit_rate": 0.0, "rag_hit_rate": 0.0, "fallback_rate": 0.0, "timeout_rate": 0.0}
return {
"counts": counts,
"rates": rates,
"avg_ms": avg_ms,
"p95_ms": p95_ms,
}
+175
View File
@@ -0,0 +1,175 @@
"""
악성·반복 민원 제한 서비스.
Level 상태 자동 조치 해제
0 정상 없음 자동
1 주의 경고 메시지 자동
2 경고 30초 응답 지연 자동 24h
3 제한 10회/일 제한 자동 72h
4 임시 차단 24시간 차단 편집장 수동 확인 필요
5 영구 제한 차단 유지 편집장 수동 해제만
원칙: 자동 영구 차단 없음. Level 4+ 는 편집장 수동 확인.
"""
import asyncio
from datetime import datetime, timedelta, timezone
from typing import Optional
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.moderation import UserRestriction, RestrictionLevel
# 레벨별 자동 만료 시간
LEVEL_EXPIRY = {
RestrictionLevel.WARNING: timedelta(hours=24),
RestrictionLevel.LIMITED: timedelta(hours=72),
RestrictionLevel.SUSPENDED: timedelta(hours=24), # 편집장 확인 전 임시
}
# 레벨 3 일별 제한 횟수
DAILY_LIMIT = 10
# 레벨 1 경고 메시지
WARNING_MESSAGE = "⚠️ 동일 문의가 반복되고 있습니다. 잠시 후 다시 시도해 주세요."
class ModerationResult:
def __init__(
self,
allowed: bool,
level: int = 0,
message: Optional[str] = None,
delay_seconds: int = 0,
):
self.allowed = allowed
self.level = level
self.message = message # 경고 메시지 (Level 1)
self.delay_seconds = delay_seconds # 응답 지연 (Level 2)
class ModerationService:
def __init__(self, db: AsyncSession):
self.db = db
async def check(self, tenant_id: str, user_key: str) -> ModerationResult:
"""
user_key의 제한 레벨 확인.
만료된 제한은 자동 해제.
"""
restriction = await self._get_restriction(tenant_id, user_key)
if restriction is None:
return ModerationResult(allowed=True, level=0)
# 만료 확인
if restriction.expires_at and restriction.expires_at < datetime.now(timezone.utc):
if restriction.level < RestrictionLevel.SUSPENDED:
await self._reset(restriction)
return ModerationResult(allowed=True, level=0)
level = restriction.level
if level == RestrictionLevel.BLOCKED:
return ModerationResult(allowed=False, level=level)
if level == RestrictionLevel.SUSPENDED:
# Level 4: 편집장 확인 전 차단
return ModerationResult(allowed=False, level=level)
if level == RestrictionLevel.LIMITED:
# Level 3: 일별 10회 제한 (Redis 카운터 없이 단순 차단)
return ModerationResult(
allowed=False,
level=level,
message="일일 문의 한도에 도달했습니다. 내일 다시 시도해 주세요.",
)
if level == RestrictionLevel.WARNING:
# Level 2: 30초 지연
return ModerationResult(
allowed=True,
level=level,
delay_seconds=30,
message="요청이 지연되고 있습니다.",
)
if level == RestrictionLevel.NORMAL + 1: # Level 1 (WARNING 사용하기 전 단계는 없으므로 1 = 주의)
return ModerationResult(
allowed=True,
level=level,
message=WARNING_MESSAGE,
)
return ModerationResult(allowed=True, level=level)
async def escalate(
self,
tenant_id: str,
user_key: str,
reason: str = "자동 감지",
) -> int:
"""
레벨 1단계 상승. Level 4+ 는 편집장 수동 확인.
반환: 새 레벨.
"""
restriction = await self._get_restriction(tenant_id, user_key)
if restriction is None:
restriction = UserRestriction(
tenant_id=tenant_id,
user_key=user_key,
level=RestrictionLevel.NORMAL,
auto_applied=True,
)
self.db.add(restriction)
current = restriction.level
if current >= RestrictionLevel.SUSPENDED:
# Level 4+ 는 자동 상승 금지
return current
new_level = min(current + 1, RestrictionLevel.SUSPENDED)
restriction.level = new_level
restriction.reason = reason
restriction.auto_applied = True
# 만료 시간 설정
expiry_delta = LEVEL_EXPIRY.get(new_level)
if expiry_delta:
restriction.expires_at = datetime.now(timezone.utc) + expiry_delta
else:
restriction.expires_at = None
await self.db.commit()
return new_level
async def release(
self,
tenant_id: str,
user_key: str,
applied_by: str,
) -> None:
"""수동 해제 (편집장 이상)."""
restriction = await self._get_restriction(tenant_id, user_key)
if restriction:
await self._reset(restriction, applied_by=applied_by)
async def _get_restriction(
self, tenant_id: str, user_key: str
) -> Optional[UserRestriction]:
result = await self.db.execute(
select(UserRestriction).where(
UserRestriction.tenant_id == tenant_id,
UserRestriction.user_key == user_key,
)
)
return result.scalar_one_or_none()
async def _reset(self, restriction: UserRestriction, applied_by: Optional[str] = None) -> None:
restriction.level = RestrictionLevel.NORMAL
restriction.expires_at = None
restriction.auto_applied = applied_by is None
if applied_by:
restriction.applied_by = applied_by
await self.db.commit()
@@ -0,0 +1,96 @@
"""
문서 파서 — 1차 정식 지원 형식:
TXT · MD · DOCX · 텍스트 PDF · HTML
"""
import io
from typing import Optional
def parse_txt(content: bytes, encoding: str = "utf-8") -> str:
return content.decode(encoding, errors="replace")
def parse_md(content: bytes) -> str:
return content.decode("utf-8", errors="replace")
def parse_html(content: bytes) -> str:
from bs4 import BeautifulSoup
soup = BeautifulSoup(content, "html.parser")
# script/style 제거
for tag in soup(["script", "style"]):
tag.decompose()
return soup.get_text(separator="\n", strip=True)
def parse_docx(content: bytes) -> str:
from docx import Document
doc = Document(io.BytesIO(content))
return "\n".join(p.text for p in doc.paragraphs if p.text.strip())
def parse_pdf(content: bytes) -> str:
try:
import pdfplumber
with pdfplumber.open(io.BytesIO(content)) as pdf:
pages = []
for page in pdf.pages:
text = page.extract_text()
if text:
pages.append(text)
return "\n".join(pages)
except Exception:
return ""
PARSERS = {
"txt": parse_txt,
"md": parse_md,
"html": parse_html,
"htm": parse_html,
"docx": parse_docx,
"pdf": parse_pdf,
}
def extract_text(content: bytes, filename: str) -> Optional[str]:
"""파일 확장자에 따라 적절한 파서 선택."""
ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
parser = PARSERS.get(ext)
if not parser:
return None
try:
return parser(content)
except Exception:
return None
def chunk_text(text: str, chunk_size: int = 500, overlap: int = 50) -> list[str]:
"""
문단 단위 청킹 (약 chunk_size 토큰).
overlap: 이전 청크 끝 글자를 다음 청크 시작에 포함.
"""
paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
chunks = []
current = []
current_len = 0
for para in paragraphs:
para_len = len(para)
if current_len + para_len > chunk_size and current:
chunk_text_ = "\n".join(current)
chunks.append(chunk_text_)
# overlap: 마지막 문단 유지
if overlap > 0 and current:
current = [current[-1]]
current_len = len(current[-1])
else:
current = []
current_len = 0
current.append(para)
current_len += para_len
if current:
chunks.append("\n".join(current))
return chunks if chunks else [text[:chunk_size]]
+109
View File
@@ -0,0 +1,109 @@
"""
Tier B — RAG 검색.
임베딩 유사도 ≥ 0.70 + 근거 문서 존재 → 문서 기반 템플릿 응답.
"""
from typing import Optional
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.knowledge import Document
from app.providers.base import SearchResult
from app.providers.embedding import EmbeddingProvider
from app.providers.vectordb import VectorDBProvider
RAG_SIMILARITY_THRESHOLD = 0.70 # Tier B 기준
class RAGSearchResult:
def __init__(self, chunk_text: str, doc: Document, score: float):
self.chunk_text = chunk_text
self.doc = doc
self.score = score
@property
def doc_name(self) -> str:
return self.doc.filename
@property
def doc_date(self) -> str:
if self.doc.published_at:
return self.doc.published_at.strftime("%Y.%m")
return ""
class RAGSearchService:
def __init__(
self,
embedding_provider: EmbeddingProvider,
vectordb_provider: VectorDBProvider,
db: AsyncSession,
):
self.embedding = embedding_provider
self.vectordb = vectordb_provider
self.db = db
async def search(
self, tenant_id: str, utterance: str, top_k: int = 3
) -> Optional[list[RAGSearchResult]]:
"""
발화를 임베딩 → 벡터DB 검색 → 0.70 이상 문서 청크 반환.
결과 없으면 None.
"""
try:
vecs = await self.embedding.embed([utterance])
except NotImplementedError:
return None
query_vec = vecs[0]
results = await self.vectordb.search(
tenant_id=tenant_id,
query_vec=query_vec,
top_k=top_k,
threshold=RAG_SIMILARITY_THRESHOLD,
)
if not results:
return None
# 중복 doc_id 제거 (같은 문서의 여러 청크 중 최고 점수만)
seen_docs: dict[str, SearchResult] = {}
for r in results:
doc_id = r.metadata.get("doc_id", r.doc_id.rsplit("_", 1)[0])
if doc_id not in seen_docs or r.score > seen_docs[doc_id].score:
seen_docs[doc_id] = r
# Document 레코드 로드 (is_active=True만)
rag_results = []
for doc_id, sr in seen_docs.items():
doc = await self._load_doc(tenant_id, doc_id)
if doc:
rag_results.append(RAGSearchResult(sr.text, doc, sr.score))
return rag_results if rag_results else None
async def _load_doc(self, tenant_id: str, doc_id: str) -> Optional[Document]:
result = await self.db.execute(
select(Document).where(
Document.tenant_id == tenant_id,
Document.id == doc_id,
Document.is_active.is_(True),
)
)
return result.scalar_one_or_none()
def build_answer(self, utterance: str, rag_results: list[RAGSearchResult]) -> str:
"""
문서 기반 템플릿 응답 생성.
출처 2단계 포맷 (간단형).
"""
# 근거 문단 합치기 (최대 2개)
contexts = [r.chunk_text[:300] for r in rag_results[:2]]
context_str = "\n---\n".join(contexts)
best = rag_results[0]
citation = f"📎 출처: {best.doc_name}"
if best.doc_date:
citation += f" ({best.doc_date})"
return f"{context_str}\n\n{citation}"
+243
View File
@@ -0,0 +1,243 @@
import asyncio
from dataclasses import dataclass, field
from typing import Optional
@dataclass
class RoutingResult:
answer: str
tier: str # 'A'|'B'|'C'|'D'
source: str # 'faq'|'rag'|'llm'|'fallback'
faq_id: Optional[str] = None
doc_id: Optional[str] = None
doc_name: Optional[str] = None
doc_date: Optional[str] = None
score: float = 0.0
elapsed_ms: int = 0
is_timeout: bool = False
request_id: Optional[str] = None
def to_dict(self) -> dict:
citations = []
if self.doc_name:
citations.append({"doc": self.doc_name, "date": self.doc_date or ""})
return {
"answer": self.answer,
"tier": self.tier,
"source": self.source,
"faq_id": self.faq_id,
"doc_id": self.doc_id,
"score": self.score,
"elapsed_ms": self.elapsed_ms,
"is_timeout": self.is_timeout,
"request_id": self.request_id,
"citations": citations,
}
class ResponseRouter:
TIMEOUT_MS = 4500 # 4.5초 — 카카오 5초 한계 - 500ms
def __init__(self, tenant_config: dict, providers: dict):
self.tenant_config = tenant_config
self.providers = providers
async def route(
self,
tenant_id: str,
utterance: str,
user_key: str,
request_id: Optional[str] = None,
db=None,
) -> RoutingResult:
import time
start = time.monotonic()
try:
return await asyncio.wait_for(
self._try_tiers(tenant_id, utterance, user_key, request_id, db, start),
timeout=self.TIMEOUT_MS / 1000,
)
except asyncio.TimeoutError:
elapsed = int((time.monotonic() - start) * 1000)
return self._tier_d(tenant_id, elapsed, is_timeout=True, request_id=request_id)
async def _try_tiers(
self,
tenant_id: str,
utterance: str,
user_key: str,
request_id: Optional[str],
db,
start: float,
) -> RoutingResult:
import time
# Tier A — FAQ 임베딩 유사도 검색
tier_a = await self._try_tier_a(tenant_id, utterance, db)
if tier_a is not None:
tier_a.elapsed_ms = int((time.monotonic() - start) * 1000)
tier_a.request_id = request_id
return tier_a
# Tier C — LLM 기반 재서술 (RAG 근거 있음 + LLM 활성화)
# Tier B보다 먼저 시도: LLM 활성 시 템플릿 대신 재서술
tier_c = await self._try_tier_c(tenant_id, utterance, db)
if tier_c is not None:
tier_c.elapsed_ms = int((time.monotonic() - start) * 1000)
tier_c.request_id = request_id
return tier_c
# Tier B — RAG 문서 검색 (LLM 비활성 또는 Tier C 실패 시)
tier_b = await self._try_tier_b(tenant_id, utterance, db)
if tier_b is not None:
tier_b.elapsed_ms = int((time.monotonic() - start) * 1000)
tier_b.request_id = request_id
return tier_b
elapsed = int((time.monotonic() - start) * 1000)
return self._tier_d(tenant_id, elapsed, request_id=request_id)
async def _try_tier_a(self, tenant_id: str, utterance: str, db) -> Optional[RoutingResult]:
"""Tier A — FAQ 임베딩 유사도 ≥ 0.85."""
embedding_provider = self.providers.get("embedding")
vectordb_provider = self.providers.get("vectordb")
if embedding_provider is None or vectordb_provider is None or db is None:
return None
from app.services.faq_search import FAQSearchService
service = FAQSearchService(embedding_provider, vectordb_provider, db)
match = await service.search(tenant_id, utterance)
if match is None:
return None
faq, score = match
# hit_count 비동기 증가 (fire-and-forget)
await service.increment_hit(faq.id)
citation_date = (
faq.updated_at.strftime("%Y.%m") if faq.updated_at else ""
)
return RoutingResult(
answer=faq.answer,
tier="A",
source="faq",
faq_id=faq.id,
doc_name=f"FAQ: {faq.question[:30]}",
doc_date=citation_date,
score=score,
)
async def _try_tier_c(self, tenant_id: str, utterance: str, db) -> Optional[RoutingResult]:
"""Tier C — RAG 근거 있음 + LLM 활성화 → 근거 기반 재서술."""
llm_provider = self.providers.get("llm")
embedding_provider = self.providers.get("embedding")
vectordb_provider = self.providers.get("vectordb")
if llm_provider is None or embedding_provider is None or vectordb_provider is None or db is None:
return None
# NullLLMProvider → None 즉시 반환
from app.providers.llm import NullLLMProvider
if isinstance(llm_provider, NullLLMProvider):
return None
# RAG 검색 (Tier B와 동일 임계값)
from app.services.rag_search import RAGSearchService
rag_service = RAGSearchService(embedding_provider, vectordb_provider, db)
rag_results = await rag_service.search(tenant_id, utterance)
if not rag_results:
return None # 근거 없으면 LLM 미호출 (P6 할루시네이션 방지)
# 근거 기반 LLM 재서술
context_chunks = [r.chunk_text for r in rag_results[:3]]
context_str = "\n---\n".join(context_chunks)
tenant_name = self.tenant_config.get("tenant_name", "")
name_prefix = f"{tenant_name}" if tenant_name else ""
system_prompt = (
f"당신은 {name_prefix}AI 안내 도우미입니다.\n"
f"반드시 아래 근거 문서의 내용만을 바탕으로 답변하세요.\n"
f"근거 없는 내용은 절대 추측하지 마세요.\n\n"
f"근거 문서:\n{context_str}"
)
answer = await llm_provider.generate(
system_prompt=system_prompt,
user_message=utterance,
context_chunks=context_chunks,
)
if answer is None:
return None # LLM 실패 → Tier D로 폴백
best = rag_results[0]
return RoutingResult(
answer=answer,
tier="C",
source="llm",
doc_id=best.doc.id,
doc_name=best.doc_name,
doc_date=best.doc_date,
score=best.score,
)
async def _try_tier_b(self, tenant_id: str, utterance: str, db) -> Optional[RoutingResult]:
"""Tier B — RAG 유사도 ≥ 0.70 + 근거 문서 존재."""
embedding_provider = self.providers.get("embedding")
vectordb_provider = self.providers.get("vectordb")
if embedding_provider is None or vectordb_provider is None or db is None:
return None
from app.services.rag_search import RAGSearchService
service = RAGSearchService(embedding_provider, vectordb_provider, db)
results = await service.search(tenant_id, utterance)
if not results:
return None
best = results[0]
answer = service.build_answer(utterance, results)
return RoutingResult(
answer=answer,
tier="B",
source="rag",
doc_id=best.doc.id,
doc_name=best.doc_name,
doc_date=best.doc_date,
score=best.score,
)
def _tier_d(
self,
tenant_id: str,
elapsed_ms: int,
is_timeout: bool = False,
request_id: Optional[str] = None,
) -> RoutingResult:
# DB 조회 없이 tenant_config 메모리에서 직접 읽음 (~5ms)
phone = self.tenant_config.get("phone_number", "")
contact = self.tenant_config.get("fallback_dept", "")
name = self.tenant_config.get("tenant_name", "")
if phone and contact:
answer = f"해당 문의는 {name} {contact}({phone})로 연락해 주세요."
elif phone:
answer = f"해당 문의는 {name}({phone})로 연락해 주세요." if name else f"해당 문의는 {phone}로 연락해 주세요."
elif name:
answer = f"죄송합니다. {name}에 직접 문의해 주세요."
else:
answer = "죄송합니다. 해당 내용을 찾을 수 없습니다. 담당자에게 직접 문의해 주세요."
return RoutingResult(
answer=answer,
tier="D",
source="fallback",
elapsed_ms=elapsed_ms,
is_timeout=is_timeout,
request_id=request_id,
)