Initial commit: import from sinmb79/Gov-chat-bot

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
airkjw
2026-03-26 12:49:43 +09:00
commit a16c972dbb
104 changed files with 8063 additions and 0 deletions

10
backend/Dockerfile Normal file
View File

@@ -0,0 +1,10 @@
FROM python:3.12-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

38
backend/alembic.ini Normal file
View File

@@ -0,0 +1,38 @@
[alembic]
script_location = alembic
prepend_sys_path = .
version_path_separator = os
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
qualname =
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

63
backend/alembic/env.py Normal file
View File

@@ -0,0 +1,63 @@
import os
from logging.config import fileConfig
from sqlalchemy import engine_from_config, pool
from alembic import context
# 모델 임포트 (테이블 메타데이터 등록)
from app.core.database import Base
import app.models.tenant
import app.models.admin
import app.models.knowledge
import app.models.complaint
import app.models.moderation
import app.models.audit
config = context.config
if config.config_file_name is not None:
fileConfig(config.config_file_name)
target_metadata = Base.metadata
def get_sync_url() -> str:
"""settings의 DATABASE_URL에서 asyncpg → psycopg2 변환."""
from app.core.config import settings
url = settings.DATABASE_URL
# asyncpg → psycopg2 (Alembic은 동기 연결 사용)
return url.replace("postgresql+asyncpg://", "postgresql+psycopg2://")
def run_migrations_offline() -> None:
url = get_sync_url()
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
cfg = config.get_section(config.config_ini_section) or {}
cfg["sqlalchemy.url"] = get_sync_url()
connectable = engine_from_config(
cfg,
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(connection=connection, target_metadata=target_metadata)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

View File

@@ -0,0 +1,26 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View File

@@ -0,0 +1,165 @@
"""Initial schema: 10 tables
Revision ID: 0001
Revises:
Create Date: 2026-03-26
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
revision: str = "0001"
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
"tenants",
sa.Column("id", sa.String(36), primary_key=True),
sa.Column("name", sa.String(100), nullable=False),
sa.Column("slug", sa.String(50), unique=True, nullable=False),
sa.Column("is_active", sa.Boolean(), default=True),
sa.Column("created_at", sa.DateTime(), server_default=sa.func.now()),
)
op.create_table(
"system_admins",
sa.Column("id", sa.String(36), primary_key=True),
sa.Column("email", sa.String(200), unique=True, nullable=False),
sa.Column("hashed_pw", sa.String(200), nullable=False),
sa.Column("is_active", sa.Boolean(), default=True),
sa.Column("created_at", sa.DateTime(), server_default=sa.func.now()),
)
op.create_table(
"tenant_configs",
sa.Column("id", sa.String(36), primary_key=True),
sa.Column("tenant_id", sa.String(36), sa.ForeignKey("tenants.id"), nullable=False),
sa.Column("key", sa.String(100), nullable=False),
sa.Column("value", sa.Text()),
sa.Column("updated_at", sa.DateTime(), server_default=sa.func.now()),
)
op.create_table(
"admin_users",
sa.Column("id", sa.String(36), primary_key=True),
sa.Column("tenant_id", sa.String(36), sa.ForeignKey("tenants.id"), nullable=False),
sa.Column("email", sa.String(200), nullable=False),
sa.Column("hashed_pw", sa.String(200), nullable=False),
sa.Column("role", sa.String(20), nullable=False, default="viewer"),
sa.Column("is_active", sa.Boolean(), default=True),
sa.Column("created_at", sa.DateTime(), server_default=sa.func.now()),
sa.UniqueConstraint("tenant_id", "email", name="uq_admin_users_tenant_email"),
)
op.create_table(
"faqs",
sa.Column("id", sa.String(36), primary_key=True),
sa.Column("tenant_id", sa.String(36), sa.ForeignKey("tenants.id"), nullable=False),
sa.Column("category", sa.String(100)),
sa.Column("question", sa.Text(), nullable=False),
sa.Column("answer", sa.Text(), nullable=False),
sa.Column("keywords", sa.JSON()),
sa.Column("hit_count", sa.Integer(), default=0),
sa.Column("is_active", sa.Boolean(), default=True),
sa.Column("created_by", sa.String(36)),
sa.Column("created_at", sa.DateTime(), server_default=sa.func.now()),
sa.Column("updated_at", sa.DateTime(), server_default=sa.func.now()),
)
op.create_table(
"documents",
sa.Column("id", sa.String(36), primary_key=True),
sa.Column("tenant_id", sa.String(36), sa.ForeignKey("tenants.id"), nullable=False),
sa.Column("filename", sa.String(255), nullable=False),
sa.Column("source_type", sa.String(50)),
sa.Column("source_url", sa.Text()),
sa.Column("published_at", sa.DateTime()),
sa.Column("effective_date", sa.DateTime()),
sa.Column("expires_at", sa.DateTime()),
sa.Column("is_active", sa.Boolean(), default=False),
sa.Column("approved_by", sa.String(36)),
sa.Column("approved_at", sa.DateTime()),
sa.Column("version", sa.Integer(), default=1),
sa.Column("supersedes_id", sa.String(36), sa.ForeignKey("documents.id")),
sa.Column("chunk_count", sa.Integer(), default=0),
sa.Column("status", sa.String(50), default="pending"),
sa.Column("created_at", sa.DateTime(), server_default=sa.func.now()),
)
op.create_table(
"crawler_urls",
sa.Column("id", sa.String(36), primary_key=True),
sa.Column("tenant_id", sa.String(36), sa.ForeignKey("tenants.id"), nullable=False),
sa.Column("url", sa.Text(), nullable=False),
sa.Column("url_type", sa.String(50)),
sa.Column("interval_hours", sa.Integer(), default=24),
sa.Column("is_active", sa.Boolean(), default=True),
sa.Column("last_crawled", sa.DateTime()),
sa.Column("created_at", sa.DateTime(), server_default=sa.func.now()),
)
op.create_table(
"complaint_logs",
sa.Column("id", sa.String(36), primary_key=True),
sa.Column("tenant_id", sa.String(36), sa.ForeignKey("tenants.id"), nullable=False),
sa.Column("user_key", sa.String(64)),
sa.Column("utterance_masked", sa.String(1000)),
sa.Column("utterance_vec_id", sa.String(36)),
sa.Column("channel", sa.String(20), default="kakao"),
sa.Column("request_id", sa.String(36)),
sa.Column("response_tier", sa.String(1)),
sa.Column("response_source", sa.String(20)),
sa.Column("faq_id", sa.String(36), sa.ForeignKey("faqs.id")),
sa.Column("doc_id", sa.String(36), sa.ForeignKey("documents.id")),
sa.Column("response_ms", sa.Integer()),
sa.Column("is_timeout", sa.Boolean(), default=False),
sa.Column("is_promoted", sa.Boolean(), default=False),
sa.Column("created_at", sa.DateTime(), server_default=sa.func.now()),
)
op.create_table(
"user_restrictions",
sa.Column("id", sa.String(36), primary_key=True),
sa.Column("tenant_id", sa.String(36), sa.ForeignKey("tenants.id"), nullable=False),
sa.Column("user_key", sa.String(64), nullable=False),
sa.Column("level", sa.Integer(), default=0),
sa.Column("reason", sa.String(500)),
sa.Column("applied_by", sa.String(36)),
sa.Column("auto_applied", sa.Boolean(), default=True),
sa.Column("expires_at", sa.DateTime()),
sa.Column("created_at", sa.DateTime(), server_default=sa.func.now()),
sa.Column("updated_at", sa.DateTime(), server_default=sa.func.now()),
sa.UniqueConstraint("tenant_id", "user_key", name="uq_user_restrictions_tenant_user"),
)
op.create_table(
"audit_logs",
sa.Column("id", sa.String(36), primary_key=True),
sa.Column("tenant_id", sa.String(36), sa.ForeignKey("tenants.id"), nullable=False),
sa.Column("actor_id", sa.String(36), nullable=False),
sa.Column("actor_type", sa.String(20), nullable=False),
sa.Column("action", sa.String(50), nullable=False),
sa.Column("target_type", sa.String(50)),
sa.Column("target_id", sa.String(36)),
sa.Column("diff", sa.JSON()),
sa.Column("ip_address", sa.String(45)),
sa.Column("created_at", sa.DateTime(), server_default=sa.func.now()),
)
def downgrade() -> None:
op.drop_table("audit_logs")
op.drop_table("user_restrictions")
op.drop_table("complaint_logs")
op.drop_table("crawler_urls")
op.drop_table("documents")
op.drop_table("faqs")
op.drop_table("admin_users")
op.drop_table("tenant_configs")
op.drop_table("system_admins")
op.drop_table("tenants")

0
backend/app/__init__.py Normal file
View File

View File

View File

@@ -0,0 +1,65 @@
from pydantic_settings import BaseSettings
from typing import Optional
class Settings(BaseSettings):
# 보안
SECRET_KEY: str = "change-this-in-production-32chars"
JWT_EXPIRE_HOURS: int = 24
# 데이터베이스
DATABASE_URL: str = "postgresql+asyncpg://botuser:botpass@db:5432/smartbot"
REDIS_URL: str = "redis://redis:6379"
# 벡터DB
VECTOR_DB: str = "chromadb"
CHROMA_HOST: str = "chromadb"
CHROMA_PORT: int = 8000
# Provider 기본값 (테넌트별 TenantConfig로 오버라이드 가능)
LLM_PROVIDER: str = "none"
EMBEDDING_PROVIDER: str = "local"
EMBEDDING_MODEL: str = "jhgan/ko-sroberta-multitask"
# 개인정보
CHAT_LOG_RETENTION_DAYS: int = 30
# Idempotency
IDEMPOTENCY_TTL_SECONDS: int = 60
# Admin 초기값
ADMIN_DEFAULT_EMAIL: str = "admin@smartbot.kr"
ADMIN_DEFAULT_PASSWORD: str = "changeme123!"
# CORS
ALLOWED_ORIGINS: list[str] = ["http://localhost"]
class Config:
env_file = ".env"
case_sensitive = True
settings = Settings()
# 설정 우선순위:
# 1 (최고) TenantConfig DB 값 → 해당 테넌트에만 적용
# 2 환경변수 (.env) → 서버 전체 기본값
# 3 (최저) 코드 하드코딩 → 폴백
async def get_tenant_config(tenant_id: str, db) -> dict:
"""TenantConfig에서 테넌트별 설정 로드. 없으면 전역 settings 사용."""
from app.models.tenant import TenantConfig
from sqlalchemy import select
result = await db.execute(
select(TenantConfig).where(TenantConfig.tenant_id == tenant_id)
)
configs = result.scalars().all()
base = settings.model_dump()
if not configs:
return base
overrides = {cfg.key: cfg.value for cfg in configs}
return {**base, **overrides}

View File

@@ -0,0 +1,28 @@
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlalchemy.orm import DeclarativeBase, sessionmaker
# Phase 0B에서 settings로 교체 예정 — 현재는 하드코딩 허용
DATABASE_URL = "sqlite+aiosqlite:///:memory:"
class Base(DeclarativeBase):
pass
engine = create_async_engine(DATABASE_URL, echo=False)
AsyncSessionLocal = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
async def get_db():
async with AsyncSessionLocal() as session:
yield session
async def init_db():
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
async def startup_hook():
"""Phase 0B startup 훅 — DB 초기화 + 추가 작업 예정."""
await init_db()

52
backend/app/core/deps.py Normal file
View File

@@ -0,0 +1,52 @@
"""
FastAPI 공통 의존성.
JWT 인증, 역할 검증.
"""
from typing import Optional
from fastapi import Depends, HTTPException, Header, status
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from app.core.database import get_db
from app.core.security import decode_token
from app.models.admin import AdminUser, AdminRole, SystemAdmin
async def get_current_admin(
authorization: Optional[str] = Header(default=None),
db: AsyncSession = Depends(get_db),
) -> AdminUser:
"""JWT Bearer 토큰에서 AdminUser 추출."""
if not authorization or not authorization.startswith("Bearer "):
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Not authenticated")
token = authorization.removeprefix("Bearer ").strip()
payload = decode_token(token)
if not payload:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token")
if payload.get("type") == "system_admin":
# SystemAdmin은 별도 처리 — 여기선 tenant-scoped API 접근 거부
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Use system admin endpoints")
user_id = payload.get("sub")
result = await db.execute(select(AdminUser).where(AdminUser.id == user_id, AdminUser.is_active.is_(True)))
user = result.scalar_one_or_none()
if not user:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="User not found")
return user
def require_role(*roles: AdminRole):
"""역할 검증 의존성 팩토리."""
async def _check(user: AdminUser = Depends(get_current_admin)) -> AdminUser:
if user.role not in roles:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Insufficient permissions")
return user
return _check
# 편의 의존성
require_editor = require_role(AdminRole.admin, AdminRole.editor)
require_admin = require_role(AdminRole.admin)

View File

@@ -0,0 +1,49 @@
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import JSONResponse
from sqlalchemy import select
EXEMPT_PATHS = {"/health", "/ready", "/engine/query", "/api/docs", "/openapi.json", "/redoc"}
EXEMPT_PREFIXES = ("/skill/", "/api/admin/") # 채널 API + 관리자 API (JWT로 tenant 검증)
class TenantMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next):
path = request.url.path
if path in EXEMPT_PATHS or any(path.startswith(p) for p in EXEMPT_PREFIXES):
request.state.tenant_id = None
return await call_next(request)
tenant_id = await self._resolve_tenant(request)
if not tenant_id:
return JSONResponse({"error": "tenant_required"}, status_code=400)
request.state.tenant_id = tenant_id
return await call_next(request)
async def _resolve_tenant(self, request: Request):
# 현재는 X-Tenant-Slug 헤더만 처리 (Phase 0B에서 JWT 추가)
slug = request.headers.get("X-Tenant-Slug")
if slug:
return slug
return None
def tenanted_query(query, model, tenant_id):
"""
주의: model 파라미터가 두 번째 인자다. (v2.0 오류 수정)
tenant_id가 None 또는 빈 문자열이면 RuntimeError 발생.
사용 예: tenanted_query(select(FAQ), FAQ, request.state.tenant_id)
"""
if not tenant_id:
raise RuntimeError(
f"tenant_id is required for {model.__tablename__} queries. "
"Check TenantMiddleware is applied."
)
return query.where(model.tenant_id == tenant_id)
def system_query(query):
"""SystemAdmin 전용 쿼리. tenant 필터 없음. 일반 서비스에서 호출 금지."""
return query

View File

@@ -0,0 +1,44 @@
from datetime import datetime, timedelta, timezone
from typing import Optional
import bcrypt
import jwt
from app.core.config import settings
def hash_password(password: str) -> str:
return bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode()
def verify_password(plain: str, hashed: str) -> bool:
return bcrypt.checkpw(plain.encode(), hashed.encode())
def create_admin_token(user_id: str, tenant_id: str, role: str) -> str:
payload = {
"sub": user_id,
"tenant_id": tenant_id,
"role": role,
"type": "admin_user",
"exp": datetime.now(timezone.utc) + timedelta(hours=settings.JWT_EXPIRE_HOURS),
}
return jwt.encode(payload, settings.SECRET_KEY, algorithm="HS256")
def create_system_token(sys_admin_id: str) -> str:
payload = {
"sub": sys_admin_id,
"tenant_id": None,
"role": "system",
"type": "system_admin",
"exp": datetime.now(timezone.utc) + timedelta(hours=settings.JWT_EXPIRE_HOURS),
}
return jwt.encode(payload, settings.SECRET_KEY, algorithm="HS256")
def decode_token(token: str) -> Optional[dict]:
try:
return jwt.decode(token, settings.SECRET_KEY, algorithms=["HS256"])
except Exception:
return None

70
backend/app/main.py Normal file
View File

@@ -0,0 +1,70 @@
from contextlib import asynccontextmanager
from fastapi import FastAPI
from app.core.config import settings
from app.core.database import startup_hook
from app.core.middleware import TenantMiddleware
from app.providers import get_embedding_provider, get_llm_provider, get_vectordb_provider
from app.routers import health
from app.routers import engine, skill
from app.routers import admin_docs, admin_crawler
from app.routers import admin_faq, admin_moderation, admin_complaints
from app.routers import admin_auth, admin_metrics
@asynccontextmanager
async def lifespan(app: FastAPI):
await startup_hook()
# 전역 설정 로드
cfg = settings.model_dump()
# Provider 초기화
embedding = get_embedding_provider(cfg)
llm = get_llm_provider(cfg)
vectordb = get_vectordb_provider(cfg)
# 임베딩 워밍업 (시작 시 모델 로드)
try:
await embedding.warmup()
except Exception:
pass # 실패해도 서버는 기동 — /ready에서 상태 노출
app.state.providers = {
"embedding": embedding,
"llm": llm,
"vectordb": vectordb,
}
app.state.tenant_configs = {} # 캐시: {tenant_slug: config_dict}
app.state.redis = None # Phase 1: Redis 연결은 선택적
# Redis 연결 시도
try:
import redis.asyncio as aioredis
r = aioredis.from_url(settings.REDIS_URL, socket_connect_timeout=2)
await r.ping()
app.state.redis = r
except Exception:
pass # Redis 없이도 동작 (Idempotency 비활성)
yield
# 종료 시 Redis 연결 해제
if app.state.redis:
await app.state.redis.aclose()
app = FastAPI(title="SmartBot KR", version="1.0.0", lifespan=lifespan)
app.add_middleware(TenantMiddleware)
app.include_router(health.router)
app.include_router(engine.router)
app.include_router(skill.router)
app.include_router(admin_docs.router)
app.include_router(admin_crawler.router)
app.include_router(admin_auth.router)
app.include_router(admin_faq.router)
app.include_router(admin_moderation.router)
app.include_router(admin_complaints.router)
app.include_router(admin_metrics.router)

View File

@@ -0,0 +1,42 @@
from app.providers.llm import LLMProvider, NullLLMProvider
from app.providers.embedding import EmbeddingProvider, NotImplementedEmbeddingProvider
from app.providers.vectordb import VectorDBProvider
# 워밍업 상태 전역 플래그
_embedding_warmed_up = False
def get_llm_provider(config: dict) -> LLMProvider:
provider = config.get("LLM_PROVIDER", "none")
if provider == "none":
return NullLLMProvider()
if provider == "anthropic":
from app.providers.llm_anthropic import AnthropicLLMProvider
return AnthropicLLMProvider(
api_key=config.get("ANTHROPIC_API_KEY", ""),
model=config.get("LLM_MODEL", "claude-haiku-4-5-20251001"),
)
if provider == "openai":
from app.providers.llm_anthropic import OpenAILLMProvider
return OpenAILLMProvider(
api_key=config.get("OPENAI_API_KEY", ""),
model=config.get("LLM_MODEL", "gpt-4o-mini"),
)
raise ValueError(f"Unknown LLM provider: {provider}")
def get_embedding_provider(config: dict) -> EmbeddingProvider:
provider = config.get("EMBEDDING_PROVIDER", "none")
if provider == "local":
from app.providers.local_embedding import LocalEmbeddingProvider
model = config.get("EMBEDDING_MODEL", "jhgan/ko-sroberta-multitask")
return LocalEmbeddingProvider(model_name=model)
return NotImplementedEmbeddingProvider()
def get_vectordb_provider(config: dict) -> VectorDBProvider:
from app.providers.chroma import ChromaVectorDBProvider
return ChromaVectorDBProvider(
host=config.get("CHROMA_HOST", "chromadb"),
port=int(config.get("CHROMA_PORT", 8000)),
)

View File

@@ -0,0 +1,10 @@
from dataclasses import dataclass, field
from typing import Any
@dataclass
class SearchResult:
text: str
doc_id: str
score: float
metadata: dict = field(default_factory=dict)

View File

@@ -0,0 +1,91 @@
from typing import Optional
from app.providers.base import SearchResult
from app.providers.vectordb import VectorDBProvider
class ChromaVectorDBProvider(VectorDBProvider):
"""
ChromaDB 기반 벡터 검색.
컬렉션명 = tenant_{tenant_id} (테넌트 격리)
"""
def __init__(self, host: str = "chromadb", port: int = 8000):
self.host = host
self.port = port
self._client = None
def _get_client(self):
if self._client is None:
import chromadb
self._client = chromadb.HttpClient(host=self.host, port=self.port)
return self._client
def _collection_name(self, tenant_id: str) -> str:
return f"tenant_{tenant_id}"
async def upsert(
self,
tenant_id: str,
doc_id: str,
chunks: list[str],
embeddings: list[list[float]],
metadatas: list[dict],
) -> int:
client = self._get_client()
collection = client.get_or_create_collection(self._collection_name(tenant_id))
ids = [f"{doc_id}_{i}" for i in range(len(chunks))]
collection.upsert(ids=ids, documents=chunks, embeddings=embeddings, metadatas=metadatas)
return len(chunks)
async def search(
self,
tenant_id: str,
query_vec: list[float],
top_k: int = 3,
threshold: float = 0.70,
) -> list[SearchResult]:
client = self._get_client()
collection_name = self._collection_name(tenant_id)
try:
collection = client.get_collection(collection_name)
except Exception:
return []
results = collection.query(
query_embeddings=[query_vec],
n_results=min(top_k, collection.count()),
include=["documents", "metadatas", "distances"],
)
search_results = []
if not results["ids"] or not results["ids"][0]:
return []
for i, doc_id in enumerate(results["ids"][0]):
# Chroma distances: 1 - cosine_similarity (낮을수록 유사)
distance = results["distances"][0][i]
score = 1.0 - distance # cosine similarity로 변환
if score >= threshold:
search_results.append(
SearchResult(
text=results["documents"][0][i],
doc_id=doc_id,
score=score,
metadata=results["metadatas"][0][i] or {},
)
)
return search_results
async def delete(self, tenant_id: str, doc_id: str) -> None:
client = self._get_client()
collection_name = self._collection_name(tenant_id)
try:
collection = client.get_collection(collection_name)
# doc_id로 시작하는 모든 청크 삭제
all_ids = collection.get()["ids"]
ids_to_delete = [id_ for id_ in all_ids if id_.startswith(f"{doc_id}_")]
if ids_to_delete:
collection.delete(ids=ids_to_delete)
except Exception:
pass

View File

@@ -0,0 +1,30 @@
from abc import ABC, abstractmethod
class EmbeddingProvider(ABC):
@abstractmethod
async def embed(self, texts: list[str]) -> list[list[float]]:
...
@abstractmethod
async def warmup(self) -> None:
...
@property
@abstractmethod
def dimension(self) -> int:
...
class NotImplementedEmbeddingProvider(EmbeddingProvider):
"""Phase 1에서 LocalEmbeddingProvider로 교체 예정"""
async def embed(self, texts: list[str]) -> list:
raise NotImplementedError("Embedding provider not configured. Set EMBEDDING_PROVIDER.")
async def warmup(self) -> None:
pass # 예외 없이 통과
@property
def dimension(self) -> int:
return 768

View File

@@ -0,0 +1,28 @@
from abc import ABC, abstractmethod
from typing import Optional
class LLMProvider(ABC):
@abstractmethod
async def generate(
self,
system_prompt: str,
user_message: str,
context_chunks: list,
max_tokens: int = 512,
) -> Optional[str]:
"""실패 시 None 반환. 예외 raise 금지."""
...
class NullLLMProvider(LLMProvider):
"""LLM_PROVIDER=none 기본값"""
async def generate(
self,
system_prompt: str = "",
user_message: str = "",
context_chunks: list = None,
max_tokens: int = 512,
) -> None:
return None

View File

@@ -0,0 +1,82 @@
"""
Anthropic Claude LLM Provider.
근거(context_chunks)가 있을 때만 호출.
할루시네이션 방지: 근거 없으면 None 반환.
"""
from typing import Optional
from app.providers.llm import LLMProvider
SYSTEM_PROMPT_TEMPLATE = """당신은 {tenant_name}AI 안내 도우미입니다.
반드시 아래 근거 문서에 있는 내용만을 바탕으로 답변하세요.
근거 없는 내용은 절대 추측하거나 생성하지 마세요.
근거 문서:
{context}
규칙:
1. 근거 문서에 없는 내용은 "담당자에게 문의해 주세요"로 안내
2. 답변은 간결하고 명확하게 (3문장 이내)
3. 전문 용어는 쉬운 말로 바꿔 설명
"""
class AnthropicLLMProvider(LLMProvider):
def __init__(self, api_key: str, model: str = "claude-haiku-4-5-20251001"):
self.api_key = api_key
self.model = model
async def generate(
self,
system_prompt: str,
user_message: str,
context_chunks: list,
max_tokens: int = 512,
) -> Optional[str]:
"""근거 없으면 None 반환. 예외 발생 시 None 반환."""
if not context_chunks:
return None # 할루시네이션 방지 — 근거 없으면 LLM 미호출
try:
import anthropic
client = anthropic.AsyncAnthropic(api_key=self.api_key)
message = await client.messages.create(
model=self.model,
max_tokens=max_tokens,
system=system_prompt,
messages=[{"role": "user", "content": user_message}],
)
return message.content[0].text if message.content else None
except Exception:
return None # 실패 시 None — 호출자가 Tier D로 폴백
class OpenAILLMProvider(LLMProvider):
def __init__(self, api_key: str, model: str = "gpt-4o-mini"):
self.api_key = api_key
self.model = model
async def generate(
self,
system_prompt: str,
user_message: str,
context_chunks: list,
max_tokens: int = 512,
) -> Optional[str]:
if not context_chunks:
return None
try:
import openai
client = openai.AsyncOpenAI(api_key=self.api_key)
response = await client.chat.completions.create(
model=self.model,
max_tokens=max_tokens,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message},
],
)
return response.choices[0].message.content
except Exception:
return None

View File

@@ -0,0 +1,32 @@
from __future__ import annotations
from app.providers.embedding import EmbeddingProvider
import app.providers as providers_module
class LocalEmbeddingProvider(EmbeddingProvider):
"""
jhgan/ko-sroberta-multitask 기반 로컬 임베딩.
sentence-transformers 패키지 필요.
"""
def __init__(self, model_name: str = "jhgan/ko-sroberta-multitask"):
self.model_name = model_name
self._model = None
async def warmup(self) -> None:
"""모델 로드. 최초 1회 실행."""
from sentence_transformers import SentenceTransformer
self._model = SentenceTransformer(self.model_name)
providers_module._embedding_warmed_up = True
async def embed(self, texts: list[str]) -> list[list[float]]:
if self._model is None:
await self.warmup()
embeddings = self._model.encode(texts, convert_to_numpy=True)
return embeddings.tolist()
@property
def dimension(self) -> int:
return 768

View File

@@ -0,0 +1,30 @@
from abc import ABC, abstractmethod
from app.providers.base import SearchResult
class VectorDBProvider(ABC):
@abstractmethod
async def upsert(
self,
tenant_id: str,
doc_id: str,
chunks: list[str],
embeddings: list[list[float]],
metadatas: list[dict],
) -> int:
...
@abstractmethod
async def search(
self,
tenant_id: str,
query_vec: list[float],
top_k: int = 3,
threshold: float = 0.70,
) -> list[SearchResult]:
...
@abstractmethod
async def delete(self, tenant_id: str, doc_id: str) -> None:
...

View File

View File

@@ -0,0 +1,47 @@
"""
관리자 인증 API.
POST /api/admin/auth/login — 로그인 (JWT 발급)
"""
from fastapi import APIRouter, Depends, HTTPException, status
from pydantic import BaseModel
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database import get_db
from app.core.security import verify_password, create_admin_token
from app.models.admin import AdminUser
router = APIRouter(prefix="/api/admin/auth", tags=["admin-auth"])
class LoginRequest(BaseModel):
tenant_id: str
email: str
password: str
class LoginResponse(BaseModel):
access_token: str
token_type: str = "bearer"
role: str
@router.post("/login", response_model=LoginResponse)
async def login(body: LoginRequest, db: AsyncSession = Depends(get_db)):
result = await db.execute(
select(AdminUser).where(
AdminUser.tenant_id == body.tenant_id,
AdminUser.email == body.email,
AdminUser.is_active == True,
)
)
user = result.scalar_one_or_none()
if user is None or not verify_password(body.password, user.hashed_pw):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="이메일 또는 비밀번호가 올바르지 않습니다.",
)
token = create_admin_token(user.id, user.tenant_id, user.role.value)
return LoginResponse(access_token=token, role=user.role.value)

View File

@@ -0,0 +1,72 @@
"""
민원 이력 조회 API — viewer 이상.
GET /api/admin/complaints — 민원 이력 목록 (마스킹 상태로 노출)
"""
from typing import Optional
from datetime import datetime
from fastapi import APIRouter, Depends, Query
from pydantic import BaseModel
from sqlalchemy import select, desc
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database import get_db
from app.core.deps import get_current_admin
from app.models.admin import AdminUser
from app.models.complaint import ComplaintLog
router = APIRouter(prefix="/api/admin/complaints", tags=["admin-complaints"])
class ComplaintOut(BaseModel):
id: str
user_key: str # 해시값만 노출
utterance_masked: Optional[str] = None # 마스킹된 발화
channel: Optional[str] = None
response_tier: Optional[str] = None
response_source: Optional[str] = None
response_ms: Optional[int] = None
is_timeout: bool
created_at: Optional[datetime] = None
class Config:
from_attributes = True
@router.get("", response_model=list[ComplaintOut])
async def list_complaints(
limit: int = Query(default=50, le=200),
tier: Optional[str] = Query(default=None, description="A|B|C|D"),
db: AsyncSession = Depends(get_db),
current_user: AdminUser = Depends(get_current_admin),
):
"""
민원 이력 조회.
- utterance는 마스킹 상태로만 노출 (관리자도 원문 열람 불가)
- user_key는 해시값만 노출
"""
query = (
select(ComplaintLog)
.where(ComplaintLog.tenant_id == current_user.tenant_id)
.order_by(desc(ComplaintLog.created_at))
.limit(limit)
)
if tier:
query = query.where(ComplaintLog.response_tier == tier)
result = await db.execute(query)
logs = result.scalars().all()
return [
ComplaintOut(
id=log.id,
user_key=log.user_key or "",
utterance_masked=log.utterance_masked,
channel=log.channel,
response_tier=log.response_tier,
response_source=log.response_source,
response_ms=log.response_ms,
is_timeout=bool(log.is_timeout),
created_at=log.created_at,
)
for log in logs
]

View File

@@ -0,0 +1,165 @@
"""
크롤러 관리 API.
POST /api/admin/crawler/urls — URL 등록
GET /api/admin/crawler/urls — URL 목록
POST /api/admin/crawler/run/{url_id} — 수동 크롤링 실행
DELETE /api/admin/crawler/urls/{id} — URL 삭제
"""
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Request, status
from pydantic import BaseModel
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database import get_db
from app.core.deps import require_editor
from app.models.admin import AdminUser
from app.models.knowledge import CrawlerURL, Document
from app.services.crawler import CrawlerService
from app.services.document_processor import DocumentProcessor
from app.services.audit import log_action
router = APIRouter(prefix="/api/admin/crawler", tags=["admin-crawler"])
class CrawlerURLCreate(BaseModel):
url: str
url_type: str = "page"
interval_hours: int = 24
class CrawlerURLOut(BaseModel):
id: str
url: str
url_type: str
interval_hours: int
is_active: bool
last_crawled: Optional[str] = None
class Config:
from_attributes = True
@router.post("/urls", status_code=status.HTTP_201_CREATED)
async def register_url(
body: CrawlerURLCreate,
db: AsyncSession = Depends(get_db),
current_user: AdminUser = Depends(require_editor),
):
"""크롤러 URL 등록."""
crawler_url = CrawlerURL(
tenant_id=current_user.tenant_id,
url=body.url,
url_type=body.url_type,
interval_hours=body.interval_hours,
is_active=True,
)
db.add(crawler_url)
await db.commit()
await db.refresh(crawler_url)
await log_action(
db=db,
tenant_id=current_user.tenant_id,
actor_id=current_user.id,
actor_type="admin_user",
action="crawler.approve",
target_type="crawler_url",
target_id=crawler_url.id,
diff={"url": body.url},
)
return {"id": crawler_url.id, "url": crawler_url.url}
@router.get("/urls", response_model=list[CrawlerURLOut])
async def list_urls(
db: AsyncSession = Depends(get_db),
current_user: AdminUser = Depends(require_editor),
):
result = await db.execute(
select(CrawlerURL).where(CrawlerURL.tenant_id == current_user.tenant_id)
)
return result.scalars().all()
@router.post("/run/{url_id}")
async def run_crawl(
url_id: str,
request: Request,
db: AsyncSession = Depends(get_db),
current_user: AdminUser = Depends(require_editor),
):
"""수동 크롤링 실행 → 텍스트 추출 → 문서로 저장."""
tenant_id = current_user.tenant_id
result = await db.execute(
select(CrawlerURL).where(CrawlerURL.id == url_id, CrawlerURL.tenant_id == tenant_id)
)
crawler_url = result.scalar_one_or_none()
if not crawler_url:
raise HTTPException(status_code=404, detail="Crawler URL not found")
service = CrawlerService(db)
text = await service.run(crawler_url, tenant_id)
if not text:
raise HTTPException(status_code=422, detail="Failed to crawl or robots.txt disallowed")
# 크롤링 결과를 문서로 저장
from urllib.parse import urlparse
parsed = urlparse(crawler_url.url)
filename = parsed.netloc + parsed.path.replace("/", "_") + ".txt"
doc = Document(
tenant_id=tenant_id,
filename=filename,
source_type="crawler",
source_url=crawler_url.url,
is_active=False, # 편집장 검토 후 승인
status="pending",
)
db.add(doc)
await db.flush()
providers = getattr(request.app.state, "providers", {})
processor = DocumentProcessor(
embedding_provider=providers.get("embedding"),
vectordb_provider=providers.get("vectordb"),
db=db,
)
chunk_count = await processor.process(tenant_id, doc, text.encode("utf-8"))
return {
"doc_id": doc.id,
"url": crawler_url.url,
"chunk_count": chunk_count,
"status": doc.status,
}
@router.delete("/urls/{url_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_url(
url_id: str,
db: AsyncSession = Depends(get_db),
current_user: AdminUser = Depends(require_editor),
):
result = await db.execute(
select(CrawlerURL).where(CrawlerURL.id == url_id, CrawlerURL.tenant_id == current_user.tenant_id)
)
crawler_url = result.scalar_one_or_none()
if not crawler_url:
raise HTTPException(status_code=404, detail="Crawler URL not found")
await db.delete(crawler_url)
await db.commit()
await log_action(
db=db,
tenant_id=current_user.tenant_id,
actor_id=current_user.id,
actor_type="admin_user",
action="crawler.reject",
target_type="crawler_url",
target_id=url_id,
)

View File

@@ -0,0 +1,190 @@
"""
문서 관리 API — 편집장(editor) 이상 접근.
POST /api/admin/documents/upload — 문서 업로드
POST /api/admin/documents/{id}/approve — 문서 승인 (is_active=True)
GET /api/admin/documents — 문서 목록
DELETE /api/admin/documents/{id} — 문서 삭제
"""
from typing import Optional
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, File, Form, status
from pydantic import BaseModel
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database import get_db
from app.core.deps import require_editor, require_admin, get_current_admin
from app.models.admin import AdminUser
from app.models.knowledge import Document
from app.services.document_processor import DocumentProcessor
from app.services.audit import log_action
router = APIRouter(prefix="/api/admin/documents", tags=["admin-documents"])
class DocumentOut(BaseModel):
id: str
filename: str
status: str
is_active: bool
chunk_count: int
version: int
published_at: Optional[datetime] = None
created_at: Optional[datetime] = None
class Config:
from_attributes = True
@router.post("/upload", status_code=status.HTTP_201_CREATED)
async def upload_document(
request: Request,
file: UploadFile = File(...),
published_at: Optional[str] = Form(default=None),
db: AsyncSession = Depends(get_db),
current_user: AdminUser = Depends(require_editor),
):
"""문서 업로드 → 파싱·임베딩 → VectorDB 저장. is_active=False (승인 대기)."""
tenant_id = current_user.tenant_id
content = await file.read()
# 지원 형식 검사
ext = file.filename.rsplit(".", 1)[-1].lower() if "." in file.filename else ""
SUPPORTED = {"txt", "md", "html", "htm", "docx", "pdf"}
if ext not in SUPPORTED:
raise HTTPException(status_code=400, detail=f"Unsupported file type: .{ext}")
# Document 레코드 생성
published = None
if published_at:
try:
published = datetime.fromisoformat(published_at)
except ValueError:
pass
doc = Document(
tenant_id=tenant_id,
filename=file.filename,
source_type="upload",
is_active=False, # 편집장 승인 전
status="pending",
published_at=published,
approved_by=None,
)
db.add(doc)
await db.flush() # id 확보
# 문서 처리
providers = getattr(request.app.state, "providers", {})
processor = DocumentProcessor(
embedding_provider=providers.get("embedding"),
vectordb_provider=providers.get("vectordb"),
db=db,
)
chunk_count = await processor.process(tenant_id, doc, content)
# 감사 로그
await log_action(
db=db,
tenant_id=tenant_id,
actor_id=current_user.id,
actor_type="admin_user",
action="doc.upload",
target_type="document",
target_id=doc.id,
diff={"filename": file.filename, "chunk_count": chunk_count},
)
return {"id": doc.id, "filename": doc.filename, "status": doc.status, "chunk_count": chunk_count}
@router.post("/{doc_id}/approve")
async def approve_document(
doc_id: str,
request: Request,
db: AsyncSession = Depends(get_db),
current_user: AdminUser = Depends(require_editor),
):
"""문서 승인 → is_active=True."""
tenant_id = current_user.tenant_id
result = await db.execute(
select(Document).where(Document.id == doc_id, Document.tenant_id == tenant_id)
)
doc = result.scalar_one_or_none()
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
if doc.status not in ("processed", "embedding_unavailable"):
raise HTTPException(status_code=400, detail=f"Cannot approve document with status: {doc.status}")
doc.is_active = True
doc.approved_by = current_user.id
doc.approved_at = datetime.utcnow()
await db.commit()
await log_action(
db=db,
tenant_id=tenant_id,
actor_id=current_user.id,
actor_type="admin_user",
action="doc.approve",
target_type="document",
target_id=doc.id,
)
return {"id": doc.id, "is_active": True}
@router.get("", response_model=list[DocumentOut])
async def list_documents(
db: AsyncSession = Depends(get_db),
current_user: AdminUser = Depends(get_current_admin),
):
"""문서 목록 조회."""
result = await db.execute(
select(Document)
.where(Document.tenant_id == current_user.tenant_id)
.order_by(Document.created_at.desc())
)
return result.scalars().all()
@router.delete("/{doc_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_document(
doc_id: str,
request: Request,
db: AsyncSession = Depends(get_db),
current_user: AdminUser = Depends(require_editor),
):
"""문서 삭제 (DB + VectorDB)."""
tenant_id = current_user.tenant_id
result = await db.execute(
select(Document).where(Document.id == doc_id, Document.tenant_id == tenant_id)
)
doc = result.scalar_one_or_none()
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
# VectorDB 청크 삭제
providers = getattr(request.app.state, "providers", {})
vectordb = providers.get("vectordb")
if vectordb:
processor = DocumentProcessor(
embedding_provider=providers.get("embedding"),
vectordb_provider=vectordb,
db=db,
)
await processor.delete(tenant_id, doc_id)
await db.delete(doc)
await db.commit()
await log_action(
db=db,
tenant_id=tenant_id,
actor_id=current_user.id,
actor_type="admin_user",
action="doc.delete",
target_type="document",
target_id=doc_id,
)

View File

@@ -0,0 +1,189 @@
"""
FAQ CRUD API — 편집장(editor) 이상.
POST /api/admin/faqs — FAQ 생성
GET /api/admin/faqs — FAQ 목록
PUT /api/admin/faqs/{id} — FAQ 수정
DELETE /api/admin/faqs/{id} — FAQ 삭제
POST /api/admin/faqs/{id}/index — FAQ 벡터 색인
"""
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Request, status
from pydantic import BaseModel
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database import get_db
from app.core.deps import require_editor, get_current_admin
from app.models.admin import AdminUser
from app.models.knowledge import FAQ
from app.services.audit import log_action
from app.services.faq_search import FAQSearchService
router = APIRouter(prefix="/api/admin/faqs", tags=["admin-faq"])
class FAQCreate(BaseModel):
category: Optional[str] = None
question: str
answer: str
keywords: Optional[list[str]] = None
class FAQUpdate(BaseModel):
category: Optional[str] = None
question: Optional[str] = None
answer: Optional[str] = None
keywords: Optional[list[str]] = None
is_active: Optional[bool] = None
class FAQOut(BaseModel):
id: str
category: Optional[str] = None
question: str
answer: str
keywords: Optional[list] = None
hit_count: int
is_active: bool
class Config:
from_attributes = True
@router.post("", status_code=status.HTTP_201_CREATED, response_model=FAQOut)
async def create_faq(
body: FAQCreate,
request: Request,
db: AsyncSession = Depends(get_db),
current_user: AdminUser = Depends(require_editor),
):
faq = FAQ(
tenant_id=current_user.tenant_id,
category=body.category,
question=body.question,
answer=body.answer,
keywords=body.keywords,
created_by=current_user.id,
is_active=True,
)
db.add(faq)
await db.flush()
# 벡터 색인
providers = getattr(request.app.state, "providers", {})
if providers.get("embedding") and providers.get("vectordb"):
service = FAQSearchService(providers["embedding"], providers["vectordb"], db)
try:
await service.index_faq(current_user.tenant_id, faq)
except Exception:
pass # 색인 실패해도 FAQ 저장은 성공
await db.commit()
await db.refresh(faq)
await log_action(
db=db,
tenant_id=current_user.tenant_id,
actor_id=current_user.id,
actor_type="admin_user",
action="faq.create",
target_type="faq",
target_id=faq.id,
diff={"question": body.question},
)
return faq
@router.get("", response_model=list[FAQOut])
async def list_faqs(
db: AsyncSession = Depends(get_db),
current_user: AdminUser = Depends(get_current_admin),
):
result = await db.execute(
select(FAQ)
.where(FAQ.tenant_id == current_user.tenant_id)
.order_by(FAQ.created_at.desc())
)
return result.scalars().all()
@router.put("/{faq_id}", response_model=FAQOut)
async def update_faq(
faq_id: str,
body: FAQUpdate,
request: Request,
db: AsyncSession = Depends(get_db),
current_user: AdminUser = Depends(require_editor),
):
result = await db.execute(
select(FAQ).where(FAQ.id == faq_id, FAQ.tenant_id == current_user.tenant_id)
)
faq = result.scalar_one_or_none()
if not faq:
raise HTTPException(status_code=404, detail="FAQ not found")
diff = {}
if body.question is not None:
diff["question"] = {"before": faq.question, "after": body.question}
faq.question = body.question
if body.answer is not None:
diff["answer"] = "updated"
faq.answer = body.answer
if body.category is not None:
faq.category = body.category
if body.keywords is not None:
faq.keywords = body.keywords
if body.is_active is not None:
faq.is_active = body.is_active
# 재색인
providers = getattr(request.app.state, "providers", {})
if providers.get("embedding") and providers.get("vectordb") and faq.is_active:
service = FAQSearchService(providers["embedding"], providers["vectordb"], db)
try:
await service.index_faq(current_user.tenant_id, faq)
except Exception:
pass
await db.commit()
await db.refresh(faq)
await log_action(
db=db,
tenant_id=current_user.tenant_id,
actor_id=current_user.id,
actor_type="admin_user",
action="faq.update",
target_type="faq",
target_id=faq_id,
diff=diff,
)
return faq
@router.delete("/{faq_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_faq(
faq_id: str,
db: AsyncSession = Depends(get_db),
current_user: AdminUser = Depends(require_editor),
):
result = await db.execute(
select(FAQ).where(FAQ.id == faq_id, FAQ.tenant_id == current_user.tenant_id)
)
faq = result.scalar_one_or_none()
if not faq:
raise HTTPException(status_code=404, detail="FAQ not found")
await db.delete(faq)
await db.commit()
await log_action(
db=db,
tenant_id=current_user.tenant_id,
actor_id=current_user.id,
actor_type="admin_user",
action="faq.delete",
target_type="faq",
target_id=faq_id,
)

View File

@@ -0,0 +1,85 @@
"""
메트릭 조회 API — viewer 이상.
GET /api/admin/metrics — Tier별 응답 통계 (ComplaintLog 기반, Redis 선택적)
"""
from fastapi import APIRouter, Depends, Request
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database import get_db
from app.core.deps import get_current_admin
from app.models.admin import AdminUser
from app.models.complaint import ComplaintLog
router = APIRouter(prefix="/api/admin/metrics", tags=["admin-metrics"])
@router.get("")
async def get_metrics(
request: Request,
db: AsyncSession = Depends(get_db),
current_user: AdminUser = Depends(get_current_admin),
):
"""
Tier별 응답 통계.
Redis가 있으면 MetricsCollector, 없으면 DB 집계로 fallback.
"""
tenant_id = current_user.tenant_id
# Redis MetricsCollector 우선
redis = getattr(request.app.state, "redis", None)
if redis is not None:
try:
from app.services.metrics import MetricsCollector
collector = MetricsCollector(redis)
overview = await collector.get_overview(tenant_id)
counts = overview.get("counts", {})
total = counts.get("total_count", 0)
return {
"total_count": total,
"tier_counts": {
"A": counts.get("faq_hit_count", 0),
"B": counts.get("rag_hit_count", 0),
"C": counts.get("llm_hit_count", 0),
"D": counts.get("fallback_count", 0),
},
"timeout_count": counts.get("timeout_count", 0),
"avg_ms": overview.get("avg_ms", 0),
"p95_ms": overview.get("p95_ms", 0),
}
except Exception:
pass
# DB fallback: ComplaintLog 집계
total_result = await db.execute(
select(func.count()).where(ComplaintLog.tenant_id == tenant_id)
)
total = total_result.scalar() or 0
tier_result = await db.execute(
select(ComplaintLog.response_tier, func.count())
.where(ComplaintLog.tenant_id == tenant_id)
.group_by(ComplaintLog.response_tier)
)
tier_counts = {row[0]: row[1] for row in tier_result.all() if row[0]}
timeout_result = await db.execute(
select(func.count()).where(
ComplaintLog.tenant_id == tenant_id,
ComplaintLog.is_timeout == True,
)
)
timeout_count = timeout_result.scalar() or 0
return {
"total_count": total,
"tier_counts": {
"A": tier_counts.get("A", 0),
"B": tier_counts.get("B", 0),
"C": tier_counts.get("C", 0),
"D": tier_counts.get("D", 0),
},
"timeout_count": timeout_count,
"avg_ms": 0,
"p95_ms": 0,
}

View File

@@ -0,0 +1,106 @@
"""
악성 유저 관리 API — 편집장(editor) 이상.
GET /api/admin/moderation — 제한 유저 목록
POST /api/admin/moderation/{user_key}/release — 수동 해제
POST /api/admin/moderation/{user_key}/escalate — 수동 레벨 상승
"""
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database import get_db
from app.core.deps import require_editor, require_admin
from app.models.admin import AdminUser
from app.models.moderation import UserRestriction
from app.services.moderation import ModerationService
from app.services.audit import log_action
router = APIRouter(prefix="/api/admin/moderation", tags=["admin-moderation"])
class RestrictionOut(BaseModel):
id: str
user_key: str
level: int
reason: str | None = None
auto_applied: bool
expires_at: str | None = None
class Config:
from_attributes = True
class EscalateRequest(BaseModel):
reason: str = "수동 조치"
@router.get("", response_model=list[RestrictionOut])
async def list_restrictions(
db: AsyncSession = Depends(get_db),
current_user: AdminUser = Depends(require_editor),
):
"""제한(level ≥ 1) 유저 목록."""
result = await db.execute(
select(UserRestriction).where(
UserRestriction.tenant_id == current_user.tenant_id,
UserRestriction.level > 0,
)
)
restrictions = result.scalars().all()
return [
RestrictionOut(
id=r.id,
user_key=r.user_key,
level=r.level,
reason=r.reason,
auto_applied=r.auto_applied,
expires_at=r.expires_at.isoformat() if r.expires_at else None,
)
for r in restrictions
]
@router.post("/{user_key}/release")
async def release_restriction(
user_key: str,
db: AsyncSession = Depends(get_db),
current_user: AdminUser = Depends(require_editor),
):
"""수동 해제 (Level 4+ 포함)."""
service = ModerationService(db)
await service.release(current_user.tenant_id, user_key, current_user.id)
await log_action(
db=db,
tenant_id=current_user.tenant_id,
actor_id=current_user.id,
actor_type="admin_user",
action="user.unblock",
target_type="user_restriction",
diff={"user_key": user_key},
)
return {"user_key": user_key, "released": True}
@router.post("/{user_key}/escalate")
async def escalate_restriction(
user_key: str,
body: EscalateRequest,
db: AsyncSession = Depends(get_db),
current_user: AdminUser = Depends(require_editor),
):
"""수동 레벨 상승."""
service = ModerationService(db)
new_level = await service.escalate(current_user.tenant_id, user_key, body.reason)
await log_action(
db=db,
tenant_id=current_user.tenant_id,
actor_id=current_user.id,
actor_type="admin_user",
action="user.restrict",
target_type="user_restriction",
diff={"user_key": user_key, "new_level": new_level, "reason": body.reason},
)
return {"user_key": user_key, "new_level": new_level}

View File

@@ -0,0 +1,112 @@
"""
POST /engine/query — 채널 공통 엔진 API (웹 시뮬레이터)
"""
import uuid
from typing import Optional
from fastapi import APIRouter, Depends, Request
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database import get_db
from app.core.config import settings
from app.services.idempotency import IdempotencyCache
from app.services.routing import ResponseRouter
from app.services.complaint_logger import log_complaint
from app.services.moderation import ModerationService
router = APIRouter()
class EngineRequest(BaseModel):
tenant: str
utterance: str
user_key: str
channel: str = "web"
request_id: Optional[str] = None
class EngineResponse(BaseModel):
answer: str
tier: str
source: str
citations: list[dict] = []
request_id: Optional[str] = None
elapsed_ms: int = 0
is_timeout: bool = False
@router.post("/engine/query", response_model=EngineResponse)
async def engine_query(
body: EngineRequest,
request: Request,
db: AsyncSession = Depends(get_db),
):
tenant_id = body.tenant
request_id = body.request_id or str(uuid.uuid4())
# Idempotency 캐시 확인
redis_client = getattr(request.app.state, "redis", None)
if redis_client:
cache = IdempotencyCache(redis_client)
cached = await cache.get(tenant_id, request_id)
if cached:
return EngineResponse(**cached)
# 악성 감지
mod_service = ModerationService(db)
mod_result = await mod_service.check(tenant_id, body.user_key)
if not mod_result.allowed:
return EngineResponse(
answer=mod_result.message or "이용이 제한되었습니다. 담당 부서에 문의해 주세요.",
tier="D",
source="fallback",
request_id=request_id,
)
# 라우터 실행
providers = getattr(request.app.state, "providers", {})
tenant_config = getattr(request.app.state, "tenant_configs", {}).get(
tenant_id, settings.model_dump()
)
router_svc = ResponseRouter(tenant_config=tenant_config, providers=providers)
result = await router_svc.route(
tenant_id=tenant_id,
utterance=body.utterance,
user_key=body.user_key,
request_id=request_id,
db=db,
)
# 경고 메시지 추가 (Level 1)
if mod_result.message and mod_result.level == 1:
result.answer = f"{mod_result.message}\n\n{result.answer}"
resp_dict = result.to_dict()
# Idempotency 캐시 저장
if redis_client:
await cache.set(tenant_id, request_id, resp_dict)
# 민원 이력 저장 (fire-and-forget: 실패해도 응답 영향 없음)
try:
await log_complaint(
db=db,
tenant_id=tenant_id,
raw_utterance=body.utterance,
raw_user_id=body.user_key,
result=result,
channel=body.channel,
)
except Exception:
pass
return EngineResponse(
answer=result.answer,
tier=result.tier,
source=result.source,
citations=resp_dict.get("citations", []),
request_id=request_id,
elapsed_ms=result.elapsed_ms,
is_timeout=result.is_timeout,
)

View File

@@ -0,0 +1,48 @@
import redis.asyncio as aioredis
from fastapi import APIRouter, Response
from sqlalchemy import text
import app.providers as providers_module
from app.core.config import settings
from app.core.database import AsyncSessionLocal
router = APIRouter()
@router.get("/health")
async def health():
return {"status": "ok", "phase": "0B", "version": "0.2.0"}
@router.get("/ready")
async def ready(response: Response):
checks = {}
all_ok = True
# DB 확인
try:
async with AsyncSessionLocal() as session:
await session.execute(text("SELECT 1"))
checks["db"] = "ok"
except Exception as e:
checks["db"] = f"error: {e}"
all_ok = False
# Redis 확인
try:
r = aioredis.from_url(settings.REDIS_URL, socket_connect_timeout=2)
await r.ping()
await r.aclose()
checks["redis"] = "ok"
except Exception as e:
checks["redis"] = f"error: {e}"
all_ok = False
# Embedding 워밍업 상태
checks["embedding"] = "warmed_up" if providers_module._embedding_warmed_up else "not_warmed_up"
if all_ok:
return {"ready": True, "checks": checks}
else:
response.status_code = 503
return {"ready": False, "checks": checks}

View File

@@ -0,0 +1,116 @@
"""
POST /skill/{tenant_slug} — 카카오 스킬 API
"""
import uuid
from typing import Optional
from fastapi import APIRouter, Depends, Request
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.database import get_db
from app.core.config import settings
from app.services.masking import mask_text, hash_user_key
from app.services.idempotency import IdempotencyCache
from app.services.routing import ResponseRouter
from app.services.complaint_logger import log_complaint
from app.services.moderation import ModerationService
router = APIRouter()
class KakaoUserRequest(BaseModel):
utterance: str
user: Optional[dict] = None
class KakaoSkillRequest(BaseModel):
userRequest: KakaoUserRequest
action: Optional[dict] = None
def build_kakao_response(answer: str, doc_name: Optional[str] = None) -> dict:
quick_replies = []
if doc_name:
quick_replies.append({
"label": "출처 보기",
"action": "message",
"messageText": f"출처: {doc_name}",
})
response = {
"version": "2.0",
"template": {
"outputs": [{"simpleText": {"text": answer}}],
},
}
if quick_replies:
response["template"]["quickReplies"] = quick_replies
return response
@router.post("/skill/{tenant_slug}")
async def kakao_skill(
tenant_slug: str,
body: KakaoSkillRequest,
request: Request,
db: AsyncSession = Depends(get_db),
):
utterance = body.userRequest.utterance
raw_user_id = (body.userRequest.user or {}).get("id", "anonymous")
masked_utterance = mask_text(utterance)
user_key = hash_user_key(raw_user_id)
action_params = (body.action or {}).get("params", {})
request_id = action_params.get("request_id") or str(uuid.uuid4())
# Idempotency 캐시 확인
redis_client = getattr(request.app.state, "redis", None)
if redis_client:
cache = IdempotencyCache(redis_client)
cached = await cache.get(tenant_slug, request_id)
if cached:
return build_kakao_response(cached.get("answer", ""), cached.get("doc_name"))
# 악성 감지
mod_service = ModerationService(db)
mod_result = await mod_service.check(tenant_slug, user_key)
if not mod_result.allowed:
answer = mod_result.message or "이용이 제한되었습니다. 운영자에게 문의해 주세요."
return build_kakao_response(answer)
# 라우터 실행
providers = getattr(request.app.state, "providers", {})
tenant_config = getattr(request.app.state, "tenant_configs", {}).get(
tenant_slug, settings.model_dump()
)
router_svc = ResponseRouter(tenant_config=tenant_config, providers=providers)
result = await router_svc.route(
tenant_id=tenant_slug,
utterance=utterance,
user_key=user_key,
request_id=request_id,
db=db,
)
if mod_result.message and mod_result.level == 1:
result.answer = f"{mod_result.message}\n\n{result.answer}"
# Idempotency 캐시 저장
if redis_client:
await cache.set(tenant_slug, request_id, result.to_dict())
# 민원 이력 저장
try:
await log_complaint(
db=db,
tenant_id=tenant_slug,
raw_utterance=utterance,
raw_user_id=raw_user_id,
result=result,
channel="kakao",
)
except Exception:
pass
return build_kakao_response(result.answer, result.doc_name)

View File

View File

@@ -0,0 +1,83 @@
"""
관리자 계정 초기 생성 스크립트.
사용법: python -m app.scripts.create_admin
"""
import asyncio
import sys
async def main():
print("=== SmartBot KR 관리자 계정 생성 ===\n")
tenant_id = input("테넌트 ID (예: dongducheon): ").strip()
if not tenant_id:
print("오류: 테넌트 ID를 입력하세요.")
sys.exit(1)
email = input("관리자 이메일: ").strip()
if not email:
print("오류: 이메일을 입력하세요.")
sys.exit(1)
import getpass
password = getpass.getpass("비밀번호 (8자 이상): ")
if len(password) < 8:
print("오류: 비밀번호는 8자 이상이어야 합니다.")
sys.exit(1)
from app.core.database import engine, Base
from app.models.tenant import Tenant, TenantConfig
from app.models.admin import AdminUser, AdminRole
from app.core.security import hash_password
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
import uuid
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
async with AsyncSession(engine) as db:
# 테넌트 확인 / 생성
result = await db.execute(select(Tenant).where(Tenant.id == tenant_id))
tenant = result.scalar_one_or_none()
if not tenant:
tenant = Tenant(id=tenant_id, name=tenant_id, slug=tenant_id)
db.add(tenant)
await db.flush()
config = TenantConfig(
id=str(uuid.uuid4()),
tenant_id=tenant_id,
key="tenant_name",
value=tenant_id,
)
db.add(config)
print(f"테넌트 생성: {tenant_id}")
# 관리자 생성
result = await db.execute(
select(AdminUser).where(
AdminUser.tenant_id == tenant_id,
AdminUser.email == email,
)
)
existing = result.scalar_one_or_none()
if existing:
print(f"이미 존재하는 계정입니다: {email}")
else:
admin = AdminUser(
id=str(uuid.uuid4()),
tenant_id=tenant_id,
email=email,
hashed_pw=hash_password(password),
role=AdminRole.admin,
)
db.add(admin)
print(f"관리자 계정 생성: {email} (role: admin)")
await db.commit()
print("\n✅ 완료! 대시보드에서 로그인하세요.")
if __name__ == "__main__":
asyncio.run(main())

View File

View File

@@ -0,0 +1,39 @@
"""
감사 로그 기록 헬퍼.
표준 action: faq.create|faq.update|faq.delete
doc.upload|doc.approve|doc.delete
user.restrict|user.unblock
crawler.approve|crawler.reject
config.update
"""
from typing import Optional
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.audit import AuditLog
async def log_action(
db: AsyncSession,
tenant_id: str,
actor_id: str,
actor_type: str, # 'admin_user' | 'system_admin'
action: str,
target_type: Optional[str] = None,
target_id: Optional[str] = None,
diff: Optional[dict] = None,
ip_address: Optional[str] = None,
) -> AuditLog:
entry = AuditLog(
tenant_id=tenant_id,
actor_id=actor_id,
actor_type=actor_type,
action=action,
target_type=target_type,
target_id=target_id,
diff=diff,
ip_address=ip_address,
)
db.add(entry)
await db.commit()
return entry

View File

@@ -0,0 +1,44 @@
"""
민원 이력 DB 저장.
개인정보 원칙: utterance_masked(마스킹), user_key(SHA-256 해시 16자리).
원문 미저장.
"""
from typing import Optional
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.complaint import ComplaintLog
from app.services.masking import mask_text, hash_user_key
from app.services.routing import RoutingResult
async def log_complaint(
db: AsyncSession,
tenant_id: str,
raw_utterance: str,
raw_user_id: str,
result: RoutingResult,
channel: str = "kakao",
) -> ComplaintLog:
"""
민원 이력을 ComplaintLog에 기록.
- utterance: 마스킹 후 저장
- user_key: SHA-256 해시 16자리
- 원문 미저장
"""
entry = ComplaintLog(
tenant_id=tenant_id,
user_key=hash_user_key(raw_user_id),
utterance_masked=mask_text(raw_utterance)[:1000],
channel=channel,
request_id=result.request_id,
response_tier=result.tier,
response_source=result.source,
faq_id=result.faq_id,
doc_id=result.doc_id,
response_ms=result.elapsed_ms,
is_timeout=result.is_timeout,
)
db.add(entry)
await db.commit()
return entry

View File

@@ -0,0 +1,92 @@
"""
웹 크롤러 — httpx + BeautifulSoup4.
robots.txt 준수. CrawlerURL 기반.
"""
from typing import Optional
from datetime import datetime, timezone
import httpx
from bs4 import BeautifulSoup
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.knowledge import CrawlerURL, Document
CRAWLER_HEADERS = {
"User-Agent": "SmartBot-KR/1.0 (+https://github.com/sinmb79/Gov-chat-bot)",
}
CRAWL_TIMEOUT = 15 # 초
async def check_robots_txt(base_url: str, target_path: str) -> bool:
"""robots.txt 확인. 크롤링 허용 여부 반환."""
try:
from urllib.parse import urlparse
parsed = urlparse(base_url)
robots_url = f"{parsed.scheme}://{parsed.netloc}/robots.txt"
async with httpx.AsyncClient(timeout=5) as client:
resp = await client.get(robots_url, headers=CRAWLER_HEADERS)
if resp.status_code != 200:
return True # robots.txt 없으면 허용으로 간주
content = resp.text.lower()
# 간단한 User-agent: * Disallow 체크
lines = content.splitlines()
in_block = False
for line in lines:
line = line.strip()
if line.startswith("user-agent:"):
agent = line.split(":", 1)[1].strip()
in_block = agent in ("*", "smartbot-kr")
elif in_block and line.startswith("disallow:"):
disallowed = line.split(":", 1)[1].strip()
if disallowed and target_path.startswith(disallowed):
return False
return True
except Exception:
return True # 확인 불가 시 허용
async def crawl_url(url: str) -> Optional[str]:
"""URL 크롤링 → 텍스트 추출. 실패 시 None."""
try:
from urllib.parse import urlparse
parsed = urlparse(url)
target_path = parsed.path or "/"
if not await check_robots_txt(url, target_path):
return None # robots.txt 불허
async with httpx.AsyncClient(
timeout=CRAWL_TIMEOUT,
follow_redirects=True,
headers=CRAWLER_HEADERS,
) as client:
resp = await client.get(url)
resp.raise_for_status()
content_type = resp.headers.get("content-type", "")
if "html" in content_type:
soup = BeautifulSoup(resp.content, "html.parser")
for tag in soup(["script", "style", "nav", "footer", "header"]):
tag.decompose()
return soup.get_text(separator="\n", strip=True)
else:
return resp.text
except Exception:
return None
class CrawlerService:
def __init__(self, db: AsyncSession):
self.db = db
async def run(self, crawler_url: CrawlerURL, tenant_id: str) -> Optional[str]:
"""크롤러 URL 실행 → 텍스트 반환."""
text = await crawl_url(crawler_url.url)
# last_crawled 업데이트
crawler_url.last_crawled = datetime.now(timezone.utc)
await self.db.commit()
return text

View File

@@ -0,0 +1,88 @@
"""
문서 처리 파이프라인:
파싱 → 청킹 → 임베딩 → VectorDB 저장 → Document 레코드 업데이트
"""
from typing import Optional
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.knowledge import Document
from app.providers.embedding import EmbeddingProvider
from app.providers.vectordb import VectorDBProvider
from app.services.parsers.text_parser import extract_text, chunk_text
class DocumentProcessor:
def __init__(
self,
embedding_provider: EmbeddingProvider,
vectordb_provider: VectorDBProvider,
db: AsyncSession,
):
self.embedding = embedding_provider
self.vectordb = vectordb_provider
self.db = db
async def process(self, tenant_id: str, doc: Document, content: bytes) -> int:
"""
문서를 파싱·청킹·임베딩하여 VectorDB에 저장.
chunk_count 반환. 실패 시 0.
"""
# 1. 텍스트 추출
text = extract_text(content, doc.filename)
if not text or not text.strip():
doc.status = "parse_failed"
await self.db.commit()
return 0
# 2. 청킹
chunks = chunk_text(text)
if not chunks:
doc.status = "parse_failed"
await self.db.commit()
return 0
# 3. 임베딩
try:
embeddings = await self.embedding.embed(chunks)
except NotImplementedError:
doc.status = "embedding_unavailable"
await self.db.commit()
return 0
except Exception:
doc.status = "embedding_failed"
await self.db.commit()
return 0
# 4. 메타데이터 구성
published = doc.published_at.strftime("%Y.%m") if doc.published_at else ""
metadatas = [
{
"doc_id": doc.id,
"filename": doc.filename,
"chunk_idx": i,
"published_at": published,
"tenant_id": tenant_id,
}
for i in range(len(chunks))
]
# 5. VectorDB 저장
await self.vectordb.upsert(
tenant_id=tenant_id,
doc_id=doc.id,
chunks=chunks,
embeddings=embeddings,
metadatas=metadatas,
)
# 6. Document 레코드 업데이트
doc.chunk_count = len(chunks)
doc.status = "processed"
await self.db.commit()
return len(chunks)
async def delete(self, tenant_id: str, doc_id: str) -> None:
"""VectorDB에서 문서 청크 삭제."""
await self.vectordb.delete(tenant_id=tenant_id, doc_id=doc_id)

View File

@@ -0,0 +1,94 @@
from typing import Optional
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.knowledge import FAQ
from app.providers.embedding import EmbeddingProvider
from app.providers.vectordb import VectorDBProvider
from app.providers.base import SearchResult
FAQ_SIMILARITY_THRESHOLD = 0.85 # Tier A 기준
class FAQSearchService:
"""
Tier A — FAQ 임베딩 유사도 검색.
임베딩 유사도 ≥ 0.85 시 등록 FAQ 반환.
"""
def __init__(
self,
embedding_provider: EmbeddingProvider,
vectordb_provider: VectorDBProvider,
db: AsyncSession,
):
self.embedding = embedding_provider
self.vectordb = vectordb_provider
self.db = db
async def search(
self, tenant_id: str, utterance: str
) -> Optional[tuple[FAQ, float]]:
"""
발화를 임베딩 → 벡터DB 검색 → 0.85 이상이면 FAQ 반환.
없으면 None.
"""
try:
vecs = await self.embedding.embed([utterance])
except NotImplementedError:
return None
query_vec = vecs[0]
results = await self.vectordb.search(
tenant_id=tenant_id,
query_vec=query_vec,
top_k=1,
threshold=FAQ_SIMILARITY_THRESHOLD,
)
if not results:
return None
top: SearchResult = results[0]
faq_id = top.metadata.get("faq_id")
if not faq_id:
return None
faq = await self._load_faq(tenant_id, faq_id)
if faq is None:
return None
return faq, top.score
async def _load_faq(self, tenant_id: str, faq_id: str) -> Optional[FAQ]:
result = await self.db.execute(
select(FAQ).where(
FAQ.tenant_id == tenant_id,
FAQ.id == faq_id,
FAQ.is_active.is_(True),
)
)
return result.scalar_one_or_none()
async def increment_hit(self, faq_id: str) -> None:
"""FAQ hit_count 증가."""
faq = await self.db.get(FAQ, faq_id)
if faq:
faq.hit_count = (faq.hit_count or 0) + 1
await self.db.commit()
async def index_faq(self, tenant_id: str, faq: FAQ) -> None:
"""FAQ를 벡터DB에 색인."""
text = f"{faq.question}\n{faq.answer}"
try:
vecs = await self.embedding.embed([text])
except NotImplementedError:
return
await self.vectordb.upsert(
tenant_id=tenant_id,
doc_id=faq.id,
chunks=[text],
embeddings=vecs,
metadatas=[{"faq_id": faq.id, "question": faq.question}],
)

View File

@@ -0,0 +1,28 @@
import json
from typing import Optional
class IdempotencyCache:
def __init__(self, redis_client):
self.redis = redis_client
self.ttl = 60 # 기본 TTL (Phase 0B에서 settings로 교체 예정)
def _key(self, tenant_id: str, request_id: str) -> str:
return f"idempotency:{tenant_id}:{request_id}"
async def get(self, tenant_id: str, request_id: Optional[str]) -> Optional[dict]:
if request_id is None:
return None
raw = await self.redis.get(self._key(tenant_id, request_id))
if raw is None:
return None
return json.loads(raw)
async def set(self, tenant_id: str, request_id: Optional[str], result_dict: dict) -> None:
if request_id is None:
return
await self.redis.setex(
self._key(tenant_id, request_id),
self.ttl,
json.dumps(result_dict),
)

View File

@@ -0,0 +1,34 @@
import hashlib
import re
# 마스킹 패턴 정의
_PATTERNS = [
# 주민등록번호: 6자리-1~4자리+6자리 (숫자 금액과 구분: 앞에 비숫자 또는 시작, 뒤에 비숫자 또는 끝)
(re.compile(r"(?<!\d)\d{6}-[1-4]\d{6}(?!\d)"), "######-*######"),
# 전화번호: 010-1234-5678 형식
(re.compile(r"0\d{1,2}-\d{3,4}-\d{4}"), "***-****-****"),
# 이메일
(re.compile(r"[\w._%+\-]+@[\w.\-]+\.[a-zA-Z]{2,}"), "***@***.***"),
# 카드번호: 4자리씩 4그룹
(re.compile(r"\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}"), "****-****-****-****"),
]
def mask_text(text: str) -> str:
"""텍스트에서 개인정보 패턴을 마스킹하여 반환."""
for pattern, replacement in _PATTERNS:
text = pattern.sub(replacement, text)
return text
def hash_user_key(kakao_id: str) -> str:
"""SHA-256 해시 앞 16자리 반환."""
return hashlib.sha256(kakao_id.encode()).hexdigest()[:16]
def has_sensitive_data(text: str) -> bool:
"""텍스트에 개인정보 패턴이 포함되어 있으면 True."""
for pattern, _ in _PATTERNS:
if pattern.search(text):
return True
return False

View File

@@ -0,0 +1,95 @@
import json
from typing import Optional
from app.services.routing import RoutingResult
METRIC_KEYS = [
"total_count",
"faq_hit_count",
"rag_hit_count",
"llm_hit_count",
"fallback_count",
"timeout_count",
"response_ms_sum",
"blocked_attempts",
]
P95_SORTED_SET = "response_ms_p95_buf"
P95_MAX_SIZE = 10000
_SOURCE_TO_KEY = {
"faq": "faq_hit_count",
"rag": "rag_hit_count",
"llm": "llm_hit_count",
"fallback": "fallback_count",
}
class MetricsCollector:
def __init__(self, redis_client):
self.redis = redis_client
def _prefix(self, tenant_id: str) -> str:
return f"tenant:{tenant_id}:metrics"
def _p95_key(self, tenant_id: str) -> str:
return f"tenant:{tenant_id}:{P95_SORTED_SET}"
async def record(self, tenant_id: str, result: RoutingResult) -> None:
prefix = self._prefix(tenant_id)
p95_key = self._p95_key(tenant_id)
pipe = self.redis.pipeline()
pipe.hincrby(prefix, "total_count", 1)
source_key = _SOURCE_TO_KEY.get(result.source)
if source_key:
pipe.hincrby(prefix, source_key, 1)
if result.is_timeout:
pipe.hincrby(prefix, "timeout_count", 1)
pipe.hincrby(prefix, "response_ms_sum", result.elapsed_ms)
# p95 sorted set — score=elapsed_ms, member=unique id
import time
member = f"{time.time_ns()}"
pipe.zadd(p95_key, {member: result.elapsed_ms})
pipe.zremrangebyrank(p95_key, 0, -(P95_MAX_SIZE + 1))
await pipe.execute()
async def get_overview(self, tenant_id: str) -> dict:
prefix = self._prefix(tenant_id)
p95_key = self._p95_key(tenant_id)
raw = await self.redis.hgetall(prefix)
counts = {k: int(v) for k, v in raw.items()} if raw else {}
total = counts.get("total_count", 0)
avg_ms = counts.get("response_ms_sum", 0) // max(total, 1)
# p95 계산
p95_ms = 0
buf_size = await self.redis.zcard(p95_key)
if buf_size > 0:
p95_idx = max(0, int(buf_size * 0.95) - 1)
p95_items = await self.redis.zrange(p95_key, p95_idx, p95_idx, withscores=True)
if p95_items:
p95_ms = int(p95_items[0][1])
rates = {}
if total > 0:
rates["faq_hit_rate"] = round(counts.get("faq_hit_count", 0) / total * 100, 2)
rates["rag_hit_rate"] = round(counts.get("rag_hit_count", 0) / total * 100, 2)
rates["fallback_rate"] = round(counts.get("fallback_count", 0) / total * 100, 2)
rates["timeout_rate"] = round(counts.get("timeout_count", 0) / total * 100, 2)
else:
rates = {"faq_hit_rate": 0.0, "rag_hit_rate": 0.0, "fallback_rate": 0.0, "timeout_rate": 0.0}
return {
"counts": counts,
"rates": rates,
"avg_ms": avg_ms,
"p95_ms": p95_ms,
}

View File

@@ -0,0 +1,175 @@
"""
악성·반복 민원 제한 서비스.
Level 상태 자동 조치 해제
0 정상 없음 자동
1 주의 경고 메시지 자동
2 경고 30초 응답 지연 자동 24h
3 제한 10회/일 제한 자동 72h
4 임시 차단 24시간 차단 편집장 수동 확인 필요
5 영구 제한 차단 유지 편집장 수동 해제만
원칙: 자동 영구 차단 없음. Level 4+ 는 편집장 수동 확인.
"""
import asyncio
from datetime import datetime, timedelta, timezone
from typing import Optional
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.moderation import UserRestriction, RestrictionLevel
# 레벨별 자동 만료 시간
LEVEL_EXPIRY = {
RestrictionLevel.WARNING: timedelta(hours=24),
RestrictionLevel.LIMITED: timedelta(hours=72),
RestrictionLevel.SUSPENDED: timedelta(hours=24), # 편집장 확인 전 임시
}
# 레벨 3 일별 제한 횟수
DAILY_LIMIT = 10
# 레벨 1 경고 메시지
WARNING_MESSAGE = "⚠️ 동일 문의가 반복되고 있습니다. 잠시 후 다시 시도해 주세요."
class ModerationResult:
def __init__(
self,
allowed: bool,
level: int = 0,
message: Optional[str] = None,
delay_seconds: int = 0,
):
self.allowed = allowed
self.level = level
self.message = message # 경고 메시지 (Level 1)
self.delay_seconds = delay_seconds # 응답 지연 (Level 2)
class ModerationService:
def __init__(self, db: AsyncSession):
self.db = db
async def check(self, tenant_id: str, user_key: str) -> ModerationResult:
"""
user_key의 제한 레벨 확인.
만료된 제한은 자동 해제.
"""
restriction = await self._get_restriction(tenant_id, user_key)
if restriction is None:
return ModerationResult(allowed=True, level=0)
# 만료 확인
if restriction.expires_at and restriction.expires_at < datetime.now(timezone.utc):
if restriction.level < RestrictionLevel.SUSPENDED:
await self._reset(restriction)
return ModerationResult(allowed=True, level=0)
level = restriction.level
if level == RestrictionLevel.BLOCKED:
return ModerationResult(allowed=False, level=level)
if level == RestrictionLevel.SUSPENDED:
# Level 4: 편집장 확인 전 차단
return ModerationResult(allowed=False, level=level)
if level == RestrictionLevel.LIMITED:
# Level 3: 일별 10회 제한 (Redis 카운터 없이 단순 차단)
return ModerationResult(
allowed=False,
level=level,
message="일일 문의 한도에 도달했습니다. 내일 다시 시도해 주세요.",
)
if level == RestrictionLevel.WARNING:
# Level 2: 30초 지연
return ModerationResult(
allowed=True,
level=level,
delay_seconds=30,
message="요청이 지연되고 있습니다.",
)
if level == RestrictionLevel.NORMAL + 1: # Level 1 (WARNING 사용하기 전 단계는 없으므로 1 = 주의)
return ModerationResult(
allowed=True,
level=level,
message=WARNING_MESSAGE,
)
return ModerationResult(allowed=True, level=level)
async def escalate(
self,
tenant_id: str,
user_key: str,
reason: str = "자동 감지",
) -> int:
"""
레벨 1단계 상승. Level 4+ 는 편집장 수동 확인.
반환: 새 레벨.
"""
restriction = await self._get_restriction(tenant_id, user_key)
if restriction is None:
restriction = UserRestriction(
tenant_id=tenant_id,
user_key=user_key,
level=RestrictionLevel.NORMAL,
auto_applied=True,
)
self.db.add(restriction)
current = restriction.level
if current >= RestrictionLevel.SUSPENDED:
# Level 4+ 는 자동 상승 금지
return current
new_level = min(current + 1, RestrictionLevel.SUSPENDED)
restriction.level = new_level
restriction.reason = reason
restriction.auto_applied = True
# 만료 시간 설정
expiry_delta = LEVEL_EXPIRY.get(new_level)
if expiry_delta:
restriction.expires_at = datetime.now(timezone.utc) + expiry_delta
else:
restriction.expires_at = None
await self.db.commit()
return new_level
async def release(
self,
tenant_id: str,
user_key: str,
applied_by: str,
) -> None:
"""수동 해제 (편집장 이상)."""
restriction = await self._get_restriction(tenant_id, user_key)
if restriction:
await self._reset(restriction, applied_by=applied_by)
async def _get_restriction(
self, tenant_id: str, user_key: str
) -> Optional[UserRestriction]:
result = await self.db.execute(
select(UserRestriction).where(
UserRestriction.tenant_id == tenant_id,
UserRestriction.user_key == user_key,
)
)
return result.scalar_one_or_none()
async def _reset(self, restriction: UserRestriction, applied_by: Optional[str] = None) -> None:
restriction.level = RestrictionLevel.NORMAL
restriction.expires_at = None
restriction.auto_applied = applied_by is None
if applied_by:
restriction.applied_by = applied_by
await self.db.commit()

View File

View File

@@ -0,0 +1,96 @@
"""
문서 파서 — 1차 정식 지원 형식:
TXT · MD · DOCX · 텍스트 PDF · HTML
"""
import io
from typing import Optional
def parse_txt(content: bytes, encoding: str = "utf-8") -> str:
return content.decode(encoding, errors="replace")
def parse_md(content: bytes) -> str:
return content.decode("utf-8", errors="replace")
def parse_html(content: bytes) -> str:
from bs4 import BeautifulSoup
soup = BeautifulSoup(content, "html.parser")
# script/style 제거
for tag in soup(["script", "style"]):
tag.decompose()
return soup.get_text(separator="\n", strip=True)
def parse_docx(content: bytes) -> str:
from docx import Document
doc = Document(io.BytesIO(content))
return "\n".join(p.text for p in doc.paragraphs if p.text.strip())
def parse_pdf(content: bytes) -> str:
try:
import pdfplumber
with pdfplumber.open(io.BytesIO(content)) as pdf:
pages = []
for page in pdf.pages:
text = page.extract_text()
if text:
pages.append(text)
return "\n".join(pages)
except Exception:
return ""
PARSERS = {
"txt": parse_txt,
"md": parse_md,
"html": parse_html,
"htm": parse_html,
"docx": parse_docx,
"pdf": parse_pdf,
}
def extract_text(content: bytes, filename: str) -> Optional[str]:
"""파일 확장자에 따라 적절한 파서 선택."""
ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
parser = PARSERS.get(ext)
if not parser:
return None
try:
return parser(content)
except Exception:
return None
def chunk_text(text: str, chunk_size: int = 500, overlap: int = 50) -> list[str]:
"""
문단 단위 청킹 (약 chunk_size 토큰).
overlap: 이전 청크 끝 글자를 다음 청크 시작에 포함.
"""
paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
chunks = []
current = []
current_len = 0
for para in paragraphs:
para_len = len(para)
if current_len + para_len > chunk_size and current:
chunk_text_ = "\n".join(current)
chunks.append(chunk_text_)
# overlap: 마지막 문단 유지
if overlap > 0 and current:
current = [current[-1]]
current_len = len(current[-1])
else:
current = []
current_len = 0
current.append(para)
current_len += para_len
if current:
chunks.append("\n".join(current))
return chunks if chunks else [text[:chunk_size]]

View File

@@ -0,0 +1,109 @@
"""
Tier B — RAG 검색.
임베딩 유사도 ≥ 0.70 + 근거 문서 존재 → 문서 기반 템플릿 응답.
"""
from typing import Optional
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.knowledge import Document
from app.providers.base import SearchResult
from app.providers.embedding import EmbeddingProvider
from app.providers.vectordb import VectorDBProvider
RAG_SIMILARITY_THRESHOLD = 0.70 # Tier B 기준
class RAGSearchResult:
def __init__(self, chunk_text: str, doc: Document, score: float):
self.chunk_text = chunk_text
self.doc = doc
self.score = score
@property
def doc_name(self) -> str:
return self.doc.filename
@property
def doc_date(self) -> str:
if self.doc.published_at:
return self.doc.published_at.strftime("%Y.%m")
return ""
class RAGSearchService:
def __init__(
self,
embedding_provider: EmbeddingProvider,
vectordb_provider: VectorDBProvider,
db: AsyncSession,
):
self.embedding = embedding_provider
self.vectordb = vectordb_provider
self.db = db
async def search(
self, tenant_id: str, utterance: str, top_k: int = 3
) -> Optional[list[RAGSearchResult]]:
"""
발화를 임베딩 → 벡터DB 검색 → 0.70 이상 문서 청크 반환.
결과 없으면 None.
"""
try:
vecs = await self.embedding.embed([utterance])
except NotImplementedError:
return None
query_vec = vecs[0]
results = await self.vectordb.search(
tenant_id=tenant_id,
query_vec=query_vec,
top_k=top_k,
threshold=RAG_SIMILARITY_THRESHOLD,
)
if not results:
return None
# 중복 doc_id 제거 (같은 문서의 여러 청크 중 최고 점수만)
seen_docs: dict[str, SearchResult] = {}
for r in results:
doc_id = r.metadata.get("doc_id", r.doc_id.rsplit("_", 1)[0])
if doc_id not in seen_docs or r.score > seen_docs[doc_id].score:
seen_docs[doc_id] = r
# Document 레코드 로드 (is_active=True만)
rag_results = []
for doc_id, sr in seen_docs.items():
doc = await self._load_doc(tenant_id, doc_id)
if doc:
rag_results.append(RAGSearchResult(sr.text, doc, sr.score))
return rag_results if rag_results else None
async def _load_doc(self, tenant_id: str, doc_id: str) -> Optional[Document]:
result = await self.db.execute(
select(Document).where(
Document.tenant_id == tenant_id,
Document.id == doc_id,
Document.is_active.is_(True),
)
)
return result.scalar_one_or_none()
def build_answer(self, utterance: str, rag_results: list[RAGSearchResult]) -> str:
"""
문서 기반 템플릿 응답 생성.
출처 2단계 포맷 (간단형).
"""
# 근거 문단 합치기 (최대 2개)
contexts = [r.chunk_text[:300] for r in rag_results[:2]]
context_str = "\n---\n".join(contexts)
best = rag_results[0]
citation = f"📎 출처: {best.doc_name}"
if best.doc_date:
citation += f" ({best.doc_date})"
return f"{context_str}\n\n{citation}"

View File

@@ -0,0 +1,243 @@
import asyncio
from dataclasses import dataclass, field
from typing import Optional
@dataclass
class RoutingResult:
answer: str
tier: str # 'A'|'B'|'C'|'D'
source: str # 'faq'|'rag'|'llm'|'fallback'
faq_id: Optional[str] = None
doc_id: Optional[str] = None
doc_name: Optional[str] = None
doc_date: Optional[str] = None
score: float = 0.0
elapsed_ms: int = 0
is_timeout: bool = False
request_id: Optional[str] = None
def to_dict(self) -> dict:
citations = []
if self.doc_name:
citations.append({"doc": self.doc_name, "date": self.doc_date or ""})
return {
"answer": self.answer,
"tier": self.tier,
"source": self.source,
"faq_id": self.faq_id,
"doc_id": self.doc_id,
"score": self.score,
"elapsed_ms": self.elapsed_ms,
"is_timeout": self.is_timeout,
"request_id": self.request_id,
"citations": citations,
}
class ResponseRouter:
TIMEOUT_MS = 4500 # 4.5초 — 카카오 5초 한계 - 500ms
def __init__(self, tenant_config: dict, providers: dict):
self.tenant_config = tenant_config
self.providers = providers
async def route(
self,
tenant_id: str,
utterance: str,
user_key: str,
request_id: Optional[str] = None,
db=None,
) -> RoutingResult:
import time
start = time.monotonic()
try:
return await asyncio.wait_for(
self._try_tiers(tenant_id, utterance, user_key, request_id, db, start),
timeout=self.TIMEOUT_MS / 1000,
)
except asyncio.TimeoutError:
elapsed = int((time.monotonic() - start) * 1000)
return self._tier_d(tenant_id, elapsed, is_timeout=True, request_id=request_id)
async def _try_tiers(
self,
tenant_id: str,
utterance: str,
user_key: str,
request_id: Optional[str],
db,
start: float,
) -> RoutingResult:
import time
# Tier A — FAQ 임베딩 유사도 검색
tier_a = await self._try_tier_a(tenant_id, utterance, db)
if tier_a is not None:
tier_a.elapsed_ms = int((time.monotonic() - start) * 1000)
tier_a.request_id = request_id
return tier_a
# Tier C — LLM 기반 재서술 (RAG 근거 있음 + LLM 활성화)
# Tier B보다 먼저 시도: LLM 활성 시 템플릿 대신 재서술
tier_c = await self._try_tier_c(tenant_id, utterance, db)
if tier_c is not None:
tier_c.elapsed_ms = int((time.monotonic() - start) * 1000)
tier_c.request_id = request_id
return tier_c
# Tier B — RAG 문서 검색 (LLM 비활성 또는 Tier C 실패 시)
tier_b = await self._try_tier_b(tenant_id, utterance, db)
if tier_b is not None:
tier_b.elapsed_ms = int((time.monotonic() - start) * 1000)
tier_b.request_id = request_id
return tier_b
elapsed = int((time.monotonic() - start) * 1000)
return self._tier_d(tenant_id, elapsed, request_id=request_id)
async def _try_tier_a(self, tenant_id: str, utterance: str, db) -> Optional[RoutingResult]:
"""Tier A — FAQ 임베딩 유사도 ≥ 0.85."""
embedding_provider = self.providers.get("embedding")
vectordb_provider = self.providers.get("vectordb")
if embedding_provider is None or vectordb_provider is None or db is None:
return None
from app.services.faq_search import FAQSearchService
service = FAQSearchService(embedding_provider, vectordb_provider, db)
match = await service.search(tenant_id, utterance)
if match is None:
return None
faq, score = match
# hit_count 비동기 증가 (fire-and-forget)
await service.increment_hit(faq.id)
citation_date = (
faq.updated_at.strftime("%Y.%m") if faq.updated_at else ""
)
return RoutingResult(
answer=faq.answer,
tier="A",
source="faq",
faq_id=faq.id,
doc_name=f"FAQ: {faq.question[:30]}",
doc_date=citation_date,
score=score,
)
async def _try_tier_c(self, tenant_id: str, utterance: str, db) -> Optional[RoutingResult]:
"""Tier C — RAG 근거 있음 + LLM 활성화 → 근거 기반 재서술."""
llm_provider = self.providers.get("llm")
embedding_provider = self.providers.get("embedding")
vectordb_provider = self.providers.get("vectordb")
if llm_provider is None or embedding_provider is None or vectordb_provider is None or db is None:
return None
# NullLLMProvider → None 즉시 반환
from app.providers.llm import NullLLMProvider
if isinstance(llm_provider, NullLLMProvider):
return None
# RAG 검색 (Tier B와 동일 임계값)
from app.services.rag_search import RAGSearchService
rag_service = RAGSearchService(embedding_provider, vectordb_provider, db)
rag_results = await rag_service.search(tenant_id, utterance)
if not rag_results:
return None # 근거 없으면 LLM 미호출 (P6 할루시네이션 방지)
# 근거 기반 LLM 재서술
context_chunks = [r.chunk_text for r in rag_results[:3]]
context_str = "\n---\n".join(context_chunks)
tenant_name = self.tenant_config.get("tenant_name", "")
name_prefix = f"{tenant_name}" if tenant_name else ""
system_prompt = (
f"당신은 {name_prefix}AI 안내 도우미입니다.\n"
f"반드시 아래 근거 문서의 내용만을 바탕으로 답변하세요.\n"
f"근거 없는 내용은 절대 추측하지 마세요.\n\n"
f"근거 문서:\n{context_str}"
)
answer = await llm_provider.generate(
system_prompt=system_prompt,
user_message=utterance,
context_chunks=context_chunks,
)
if answer is None:
return None # LLM 실패 → Tier D로 폴백
best = rag_results[0]
return RoutingResult(
answer=answer,
tier="C",
source="llm",
doc_id=best.doc.id,
doc_name=best.doc_name,
doc_date=best.doc_date,
score=best.score,
)
async def _try_tier_b(self, tenant_id: str, utterance: str, db) -> Optional[RoutingResult]:
"""Tier B — RAG 유사도 ≥ 0.70 + 근거 문서 존재."""
embedding_provider = self.providers.get("embedding")
vectordb_provider = self.providers.get("vectordb")
if embedding_provider is None or vectordb_provider is None or db is None:
return None
from app.services.rag_search import RAGSearchService
service = RAGSearchService(embedding_provider, vectordb_provider, db)
results = await service.search(tenant_id, utterance)
if not results:
return None
best = results[0]
answer = service.build_answer(utterance, results)
return RoutingResult(
answer=answer,
tier="B",
source="rag",
doc_id=best.doc.id,
doc_name=best.doc_name,
doc_date=best.doc_date,
score=best.score,
)
def _tier_d(
self,
tenant_id: str,
elapsed_ms: int,
is_timeout: bool = False,
request_id: Optional[str] = None,
) -> RoutingResult:
# DB 조회 없이 tenant_config 메모리에서 직접 읽음 (~5ms)
phone = self.tenant_config.get("phone_number", "")
contact = self.tenant_config.get("fallback_dept", "")
name = self.tenant_config.get("tenant_name", "")
if phone and contact:
answer = f"해당 문의는 {name} {contact}({phone})로 연락해 주세요."
elif phone:
answer = f"해당 문의는 {name}({phone})로 연락해 주세요." if name else f"해당 문의는 {phone}로 연락해 주세요."
elif name:
answer = f"죄송합니다. {name}에 직접 문의해 주세요."
else:
answer = "죄송합니다. 해당 내용을 찾을 수 없습니다. 담당자에게 직접 문의해 주세요."
return RoutingResult(
answer=answer,
tier="D",
source="fallback",
elapsed_ms=elapsed_ms,
is_timeout=is_timeout,
request_id=request_id,
)

2
backend/pytest.ini Normal file
View File

@@ -0,0 +1,2 @@
[pytest]
asyncio_mode = auto

20
backend/requirements.txt Normal file
View File

@@ -0,0 +1,20 @@
fastapi==0.115.0
uvicorn[standard]==0.30.6
pydantic==2.8.2
pydantic-settings==2.4.0
sqlalchemy[asyncio]==2.0.35
pytest==8.3.3
pytest-asyncio==0.24.0
httpx==0.27.2
aiosqlite==0.20.0
asyncpg==0.29.0
alembic==1.13.2
redis[asyncio]==5.1.1
bcrypt==4.2.0
pyjwt==2.9.0
python-multipart==0.0.9
sentence-transformers==3.1.1
chromadb==0.5.18
beautifulsoup4==4.12.3
python-docx==1.1.2
pdfplumber==0.11.4

View File

56
backend/tests/conftest.py Normal file
View File

@@ -0,0 +1,56 @@
import pytest
import pytest_asyncio
from httpx import AsyncClient, ASGITransport
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlalchemy.orm import sessionmaker
from app.core.database import Base, get_db
from app.main import app
# 모든 모델을 명시적으로 임포트하여 Base.metadata에 등록 (FK 순서 보장)
from app.models import tenant as _m1 # noqa: F401
from app.models import admin as _m2 # noqa: F401
from app.models import knowledge as _m3 # noqa: F401
from app.models import complaint as _m4 # noqa: F401
from app.models import moderation as _m5 # noqa: F401
from app.models import audit as _m6 # noqa: F401
TEST_DATABASE_URL = "sqlite+aiosqlite:///:memory:"
@pytest_asyncio.fixture(scope="function")
async def db_engine():
engine = create_async_engine(TEST_DATABASE_URL, echo=False)
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
yield engine
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.drop_all)
await engine.dispose()
@pytest_asyncio.fixture(scope="function")
async def db_session(db_engine):
async_session = sessionmaker(db_engine, class_=AsyncSession, expire_on_commit=False)
async with async_session() as session:
yield session
@pytest_asyncio.fixture(scope="function")
async def client(db_engine):
async_session = sessionmaker(db_engine, class_=AsyncSession, expire_on_commit=False)
async def override_get_db():
async with async_session() as session:
yield session
app.dependency_overrides[get_db] = override_get_db
async with AsyncClient(
transport=ASGITransport(app=app), base_url="http://test"
) as ac:
# app 참조를 client에 노출 (테스트에서 app.state 패치 용도)
ac.app = app
yield ac
app.dependency_overrides.clear()

View File

@@ -0,0 +1,114 @@
"""
관리자 인증 API 테스트.
POST /api/admin/auth/login
"""
import pytest
from unittest.mock import AsyncMock, MagicMock
from uuid import uuid4
from app.models.admin import AdminUser, AdminRole
from app.core.security import hash_password
def make_user(tenant_id: str = "t1", email: str = "admin@test.com") -> AdminUser:
user = AdminUser()
user.id = str(uuid4())
user.tenant_id = tenant_id
user.email = email
user.hashed_pw = hash_password("secret123")
user.role = AdminRole.admin
user.is_active = True
return user
@pytest.mark.asyncio
async def test_login_success(client):
"""올바른 자격증명 → access_token + role 반환."""
user = make_user()
mock_result = MagicMock()
mock_result.scalar_one_or_none = MagicMock(return_value=user)
from app.core.database import get_db
async def override_db():
db = AsyncMock()
db.execute = AsyncMock(return_value=mock_result)
yield db
client.app.dependency_overrides[get_db] = override_db
try:
res = await client.post("/api/admin/auth/login", json={
"tenant_id": user.tenant_id,
"email": user.email,
"password": "secret123",
})
assert res.status_code == 200
data = res.json()
assert "access_token" in data
assert data["token_type"] == "bearer"
assert data["role"] == "admin"
finally:
client.app.dependency_overrides.pop(get_db, None)
@pytest.mark.asyncio
async def test_login_wrong_password(client):
"""틀린 비밀번호 → 401."""
user = make_user()
mock_result = MagicMock()
mock_result.scalar_one_or_none = MagicMock(return_value=user)
from app.core.database import get_db
async def override_db():
db = AsyncMock()
db.execute = AsyncMock(return_value=mock_result)
yield db
client.app.dependency_overrides[get_db] = override_db
try:
res = await client.post("/api/admin/auth/login", json={
"tenant_id": user.tenant_id,
"email": user.email,
"password": "wrong-password",
})
assert res.status_code == 401
finally:
client.app.dependency_overrides.pop(get_db, None)
@pytest.mark.asyncio
async def test_login_user_not_found(client):
"""존재하지 않는 사용자 → 401."""
mock_result = MagicMock()
mock_result.scalar_one_or_none = MagicMock(return_value=None)
from app.core.database import get_db
async def override_db():
db = AsyncMock()
db.execute = AsyncMock(return_value=mock_result)
yield db
client.app.dependency_overrides[get_db] = override_db
try:
res = await client.post("/api/admin/auth/login", json={
"tenant_id": "nonexistent",
"email": "nobody@test.com",
"password": "any",
})
assert res.status_code == 401
finally:
client.app.dependency_overrides.pop(get_db, None)
@pytest.mark.asyncio
async def test_login_missing_fields(client):
"""필수 필드 누락 → 422."""
res = await client.post("/api/admin/auth/login", json={"email": "x@x.com"})
assert res.status_code == 422

View File

@@ -0,0 +1,79 @@
"""
메트릭 조회 API 테스트.
GET /api/admin/metrics
"""
import pytest
from unittest.mock import AsyncMock, MagicMock
from uuid import uuid4
from app.models.admin import AdminUser, AdminRole
from app.core.security import create_admin_token, hash_password
def make_user(tenant_id: str = "t1") -> AdminUser:
user = AdminUser()
user.id = str(uuid4())
user.tenant_id = tenant_id
user.email = "admin@test.com"
user.hashed_pw = hash_password("pw")
user.role = AdminRole.admin
user.is_active = True
return user
def auth_headers(user: AdminUser) -> dict:
token = create_admin_token(user.id, user.tenant_id, user.role.value)
return {"Authorization": f"Bearer {token}"}
@pytest.mark.asyncio
async def test_metrics_no_auth_returns_401(client):
"""인증 없이 접근 → 401."""
res = await client.get("/api/admin/metrics")
assert res.status_code == 401
@pytest.mark.asyncio
async def test_metrics_db_fallback(client):
"""Redis 없을 때 DB 집계로 메트릭 반환."""
user = make_user()
call_count = 0
async def fake_execute(query):
nonlocal call_count
call_count += 1
mock = MagicMock()
if call_count == 1:
# get_current_admin: AdminUser 조회
mock.scalar_one_or_none = MagicMock(return_value=user)
return mock
sql = str(query)
if "response_tier" in sql or "group_by" in sql.lower():
mock.all = MagicMock(return_value=[("A", 5), ("B", 3), ("D", 2)])
elif "is_timeout" in sql:
mock.scalar = MagicMock(return_value=1)
else:
mock.scalar = MagicMock(return_value=10)
return mock
from app.core.database import get_db
async def override_db():
db = AsyncMock()
db.execute = AsyncMock(side_effect=fake_execute)
yield db
client.app.dependency_overrides[get_db] = override_db
client.app.state.redis = None
try:
res = await client.get("/api/admin/metrics", headers=auth_headers(user))
assert res.status_code == 200
data = res.json()
assert "total_count" in data
assert "tier_counts" in data
assert "timeout_count" in data
assert set(data["tier_counts"].keys()) == {"A", "B", "C", "D"}
finally:
client.app.dependency_overrides.pop(get_db, None)

View File

@@ -0,0 +1,115 @@
"""
감사 로그 단위 테스트.
"""
import pytest
from unittest.mock import AsyncMock, MagicMock, call
from app.services.audit import log_action
from app.models.audit import AuditLog
@pytest.mark.asyncio
async def test_log_action_creates_audit_entry():
"""log_action 호출 시 AuditLog 추가 및 커밋."""
db = AsyncMock()
db.add = MagicMock()
db.commit = AsyncMock()
entry = await log_action(
db=db,
tenant_id="tenant-1",
actor_id="user-1",
actor_type="admin_user",
action="doc.upload",
target_type="document",
target_id="doc-1",
diff={"filename": "guide.txt"},
)
db.add.assert_called_once()
db.commit.assert_called_once()
assert entry.action == "doc.upload"
assert entry.tenant_id == "tenant-1"
assert entry.actor_id == "user-1"
@pytest.mark.asyncio
async def test_log_action_doc_approve():
"""doc.approve 액션 기록."""
db = AsyncMock()
db.add = MagicMock()
db.commit = AsyncMock()
entry = await log_action(
db=db,
tenant_id="t1",
actor_id="editor-1",
actor_type="admin_user",
action="doc.approve",
target_type="document",
target_id="doc-2",
)
assert entry.action == "doc.approve"
assert entry.target_id == "doc-2"
@pytest.mark.asyncio
async def test_log_action_includes_diff():
"""diff 필드가 저장됨."""
db = AsyncMock()
db.add = MagicMock()
db.commit = AsyncMock()
diff = {"before": "pending", "after": "processed"}
entry = await log_action(
db=db,
tenant_id="t1",
actor_id="user-1",
actor_type="admin_user",
action="config.update",
diff=diff,
)
assert entry.diff == diff
@pytest.mark.asyncio
async def test_log_action_without_target():
"""target_type/target_id 없어도 정상 기록."""
db = AsyncMock()
db.add = MagicMock()
db.commit = AsyncMock()
entry = await log_action(
db=db,
tenant_id="t1",
actor_id="user-1",
actor_type="system_admin",
action="config.update",
)
assert entry.target_type is None
assert entry.target_id is None
@pytest.mark.asyncio
async def test_log_crawler_approve():
"""crawler.approve 감사 로그."""
db = AsyncMock()
db.add = MagicMock()
db.commit = AsyncMock()
entry = await log_action(
db=db,
tenant_id="t1",
actor_id="editor-1",
actor_type="admin_user",
action="crawler.approve",
target_type="crawler_url",
target_id="url-1",
diff={"url": "https://www.dongducheon.go.kr"},
)
assert entry.action == "crawler.approve"
assert entry.diff["url"] == "https://www.dongducheon.go.kr"

View File

@@ -0,0 +1,65 @@
from datetime import datetime, timedelta, timezone
import jwt
import pytest
from app.core.security import (
hash_password,
verify_password,
create_admin_token,
create_system_token,
decode_token,
)
from app.core.config import settings
def test_hash_and_verify_password():
"""해싱 후 verify True, 다른 비밀번호는 False."""
hashed = hash_password("secret123")
assert verify_password("secret123", hashed) is True
assert verify_password("wrong", hashed) is False
def test_create_admin_token_has_tenant_id():
"""decode 시 payload['tenant_id'] == 'tenant-id', type == 'admin_user'."""
token = create_admin_token("user-1", "tenant-id", "admin")
payload = decode_token(token)
assert payload is not None
assert payload["tenant_id"] == "tenant-id"
assert payload["type"] == "admin_user"
def test_create_system_token_has_no_tenant_id():
"""decode 시 payload['tenant_id'] is None, type == 'system_admin'."""
token = create_system_token("sys-admin-1")
payload = decode_token(token)
assert payload is not None
assert payload["tenant_id"] is None
assert payload["type"] == "system_admin"
def test_decode_invalid_token_returns_none():
"""decode_token('invalid.token') == None."""
assert decode_token("invalid.token") is None
def test_decode_expired_token_returns_none():
"""만료 토큰 생성 후 decode == None."""
payload = {
"sub": "user-1",
"tenant_id": "tenant-1",
"exp": datetime.now(timezone.utc) - timedelta(seconds=1),
}
expired_token = jwt.encode(payload, settings.SECRET_KEY, algorithm="HS256")
assert decode_token(expired_token) is None
def test_admin_and_system_token_different_type():
"""두 토큰의 type 필드 값 다름."""
admin_token = create_admin_token("user-1", "tenant-1", "admin")
system_token = create_system_token("sys-1")
admin_payload = decode_token(admin_token)
system_payload = decode_token(system_token)
assert admin_payload["type"] != system_payload["type"]

View File

@@ -0,0 +1,128 @@
"""
민원 이력 저장 테스트.
"""
import pytest
from unittest.mock import AsyncMock, MagicMock
from app.services.complaint_logger import log_complaint
from app.services.routing import RoutingResult
def make_result(tier="D", source="fallback", request_id="req-1") -> RoutingResult:
return RoutingResult(
answer="답변",
tier=tier,
source=source,
elapsed_ms=100,
is_timeout=False,
request_id=request_id,
)
@pytest.mark.asyncio
async def test_log_complaint_masks_utterance():
"""민원 이력에 원문 미저장, 마스킹 후 저장."""
db = AsyncMock()
db.add = MagicMock()
db.commit = AsyncMock()
raw_utterance = "제 전화번호는 010-1234-5678 입니다"
result = make_result()
entry = await log_complaint(
db=db,
tenant_id="t1",
raw_utterance=raw_utterance,
raw_user_id="kakao-user-123",
result=result,
)
# 원문 미저장
assert "010-1234-5678" not in (entry.utterance_masked or "")
# 마스킹 처리됨
assert "***-****-****" in (entry.utterance_masked or "")
@pytest.mark.asyncio
async def test_log_complaint_hashes_user_id():
"""user_key는 해시값(16자리)으로 저장."""
db = AsyncMock()
db.add = MagicMock()
db.commit = AsyncMock()
result = make_result()
entry = await log_complaint(
db=db,
tenant_id="t1",
raw_utterance="일반 민원",
raw_user_id="real-kakao-id-12345",
result=result,
)
assert entry.user_key != "real-kakao-id-12345"
assert len(entry.user_key) == 16
@pytest.mark.asyncio
async def test_log_complaint_stores_tier_and_source():
"""response_tier, response_source 저장."""
db = AsyncMock()
db.add = MagicMock()
db.commit = AsyncMock()
result = make_result(tier="A", source="faq")
entry = await log_complaint(
db=db,
tenant_id="t1",
raw_utterance="질문",
raw_user_id="user-1",
result=result,
)
assert entry.response_tier == "A"
assert entry.response_source == "faq"
@pytest.mark.asyncio
async def test_log_complaint_stores_request_id():
"""request_id 저장 (Idempotency 추적)."""
db = AsyncMock()
db.add = MagicMock()
db.commit = AsyncMock()
result = make_result(request_id="unique-req-abc")
entry = await log_complaint(
db=db,
tenant_id="t1",
raw_utterance="질문",
raw_user_id="user-1",
result=result,
)
assert entry.request_id == "unique-req-abc"
@pytest.mark.asyncio
async def test_log_complaint_timeout_flag():
"""타임아웃 시 is_timeout=True 저장."""
db = AsyncMock()
db.add = MagicMock()
db.commit = AsyncMock()
result = RoutingResult(
answer="타임아웃",
tier="D",
source="fallback",
elapsed_ms=4500,
is_timeout=True,
)
entry = await log_complaint(
db=db,
tenant_id="t1",
raw_utterance="질문",
raw_user_id="user-1",
result=result,
)
assert entry.is_timeout is True

View File

@@ -0,0 +1,92 @@
"""
크롤러 서비스 단위 테스트.
"""
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from app.services.crawler import crawl_url, check_robots_txt, CrawlerService
from app.services.parsers.text_parser import extract_text
@pytest.mark.asyncio
async def test_crawl_url_extracts_text():
"""정상 HTML 응답 → 텍스트 추출."""
html_str = "<html><body><p>동두천시 여권 안내</p><script>bad()</script></body></html>"
html = html_str.encode("utf-8")
mock_response = MagicMock()
mock_response.content = html
mock_response.text = html_str
mock_response.headers = {"content-type": "text/html"}
mock_response.raise_for_status = MagicMock()
mock_client = AsyncMock()
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=None)
mock_client.get = AsyncMock(return_value=mock_response)
with patch("app.services.crawler.check_robots_txt", return_value=True), \
patch("httpx.AsyncClient", return_value=mock_client):
result = await crawl_url("https://www.example.com/guide")
assert result is not None
assert "동두천시" in result
assert "bad()" not in result
@pytest.mark.asyncio
async def test_crawl_url_returns_none_on_robots_disallow():
"""robots.txt 불허 → None 반환."""
with patch("app.services.crawler.check_robots_txt", return_value=False):
result = await crawl_url("https://www.example.com/private")
assert result is None
@pytest.mark.asyncio
async def test_crawl_url_returns_none_on_network_error():
"""네트워크 오류 → None 반환 (예외 미전파)."""
import httpx
mock_client = AsyncMock()
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=None)
mock_client.get = AsyncMock(side_effect=httpx.ConnectError("connection refused"))
with patch("app.services.crawler.check_robots_txt", return_value=True), \
patch("httpx.AsyncClient", return_value=mock_client):
result = await crawl_url("https://unreachable.example.com")
assert result is None
@pytest.mark.asyncio
async def test_crawler_service_updates_last_crawled():
"""run() 후 last_crawled 업데이트."""
db = AsyncMock()
db.commit = AsyncMock()
crawler_url = MagicMock()
crawler_url.url = "https://www.example.com"
crawler_url.last_crawled = None
with patch("app.services.crawler.crawl_url", return_value="크롤링된 내용"):
service = CrawlerService(db)
result = await service.run(crawler_url, "tenant-1")
assert result == "크롤링된 내용"
assert crawler_url.last_crawled is not None
db.commit.assert_called_once()
@pytest.mark.asyncio
async def test_crawler_service_returns_none_on_fail():
"""크롤링 실패 → None 반환."""
db = AsyncMock()
crawler_url = MagicMock()
crawler_url.url = "https://fail.example.com"
with patch("app.services.crawler.crawl_url", return_value=None):
service = CrawlerService(db)
result = await service.run(crawler_url, "tenant-1")
assert result is None

View File

@@ -0,0 +1,123 @@
"""
DocumentProcessor + 텍스트 파서 단위 테스트.
"""
import pytest
from unittest.mock import AsyncMock, MagicMock
from app.services.parsers.text_parser import extract_text, chunk_text
from app.services.document_processor import DocumentProcessor
# ── 파서 테스트 ─────────────────────────────────────────────────────
def test_extract_text_txt():
content = "안녕하세요\n여권 발급 안내입니다".encode("utf-8")
result = extract_text(content, "guide.txt")
assert "여권" in result
def test_extract_text_md():
content = "# 제목\n본문 내용".encode("utf-8")
result = extract_text(content, "readme.md")
assert "본문" in result
def test_extract_text_html():
content = "<html><body><p>여권 안내</p><script>alert(1)</script></body></html>".encode("utf-8")
result = extract_text(content, "page.html")
assert "여권" in result
assert "alert" not in result
def test_extract_text_unsupported_returns_none():
result = extract_text(b"binary", "file.exe")
assert result is None
def test_chunk_text_splits_correctly():
text = "\n".join(["문장 " + str(i) for i in range(50)])
chunks = chunk_text(text, chunk_size=100)
assert len(chunks) > 1
for chunk in chunks:
assert len(chunk) > 0
def test_chunk_text_small_text_single_chunk():
text = "짧은 텍스트"
chunks = chunk_text(text, chunk_size=500)
assert len(chunks) == 1
assert "짧은 텍스트" in chunks[0]
# ── DocumentProcessor 테스트 ────────────────────────────────────────
@pytest.mark.asyncio
async def test_processor_stores_chunks_in_vectordb():
"""파싱 성공 → VectorDB에 upsert 호출."""
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768, [0.2] * 768])
vectordb = AsyncMock()
vectordb.upsert = AsyncMock(return_value=2)
db = AsyncMock()
db.commit = AsyncMock()
doc = MagicMock()
doc.id = "doc-1"
doc.filename = "test.txt"
doc.published_at = None
doc.status = "pending"
doc.chunk_count = 0
processor = DocumentProcessor(embedding, vectordb, db)
content = "첫 번째 문단입니다.\n두 번째 문단입니다.".encode("utf-8")
count = await processor.process("tenant-1", doc, content)
assert count > 0
assert vectordb.upsert.called
assert doc.status == "processed"
@pytest.mark.asyncio
async def test_processor_fails_on_unsupported_format():
"""지원하지 않는 파일 형식 → chunk_count=0, status=parse_failed."""
embedding = AsyncMock()
vectordb = AsyncMock()
db = AsyncMock()
db.commit = AsyncMock()
doc = MagicMock()
doc.id = "doc-2"
doc.filename = "file.exe"
doc.published_at = None
doc.status = "pending"
processor = DocumentProcessor(embedding, vectordb, db)
count = await processor.process("tenant-1", doc, b"binary data")
assert count == 0
assert doc.status == "parse_failed"
@pytest.mark.asyncio
async def test_processor_handles_embedding_not_implemented():
"""임베딩 미구성 → status=embedding_unavailable."""
embedding = AsyncMock()
embedding.embed = AsyncMock(side_effect=NotImplementedError)
vectordb = AsyncMock()
db = AsyncMock()
db.commit = AsyncMock()
doc = MagicMock()
doc.id = "doc-3"
doc.filename = "guide.txt"
doc.published_at = None
doc.status = "pending"
processor = DocumentProcessor(embedding, vectordb, db)
count = await processor.process("tenant-1", doc, "본문 내용".encode("utf-8"))
assert count == 0
assert doc.status == "embedding_unavailable"

View File

@@ -0,0 +1,109 @@
"""
POST /engine/query 엔드포인트 테스트 (웹 시뮬레이터).
"""
import pytest
from unittest.mock import AsyncMock, patch
@pytest.mark.asyncio
async def test_engine_query_returns_correct_structure(client):
"""POST /engine/query → 필수 필드 포함 응답."""
response = await client.post(
"/engine/query",
json={
"tenant": "test-tenant",
"utterance": "여권 발급 방법 알려주세요",
"user_key": "test-user-key",
},
)
assert response.status_code == 200
data = response.json()
assert "answer" in data
assert "tier" in data
assert "source" in data
assert "citations" in data
assert "request_id" in data
@pytest.mark.asyncio
async def test_engine_query_tier_d_when_no_faq(client):
"""FAQ 없으면 Tier D 폴백 반환."""
response = await client.post(
"/engine/query",
json={
"tenant": "test-tenant",
"utterance": "알 수 없는 질문",
"user_key": "user-1",
},
)
assert response.status_code == 200
data = response.json()
assert data["tier"] == "D"
assert data["source"] == "fallback"
@pytest.mark.asyncio
async def test_engine_query_idempotency_deduplication(client):
"""같은 request_id로 두 번 요청 → 동일 응답 (캐시 HIT)."""
payload = {
"tenant": "test-tenant",
"utterance": "중복 요청 테스트",
"user_key": "user-1",
"request_id": "dup-req-001",
}
# Mock Redis for idempotency
mock_redis = AsyncMock()
cached_result = {
"answer": "캐시된 응답",
"tier": "D",
"source": "fallback",
"citations": [],
"request_id": "dup-req-001",
"elapsed_ms": 10,
"is_timeout": False,
}
import json
mock_redis.get = AsyncMock(return_value=json.dumps(cached_result).encode())
mock_redis.setex = AsyncMock()
# app.state에 직접 설정 (lifespan 미실행 상태)
client.app.state.redis = mock_redis
try:
resp1 = await client.post("/engine/query", json=payload)
finally:
client.app.state.redis = None
assert resp1.status_code == 200
assert resp1.json()["answer"] == "캐시된 응답"
@pytest.mark.asyncio
async def test_engine_query_request_id_auto_generated(client):
"""request_id 미전송 시 자동 생성."""
response = await client.post(
"/engine/query",
json={
"tenant": "test-tenant",
"utterance": "테스트",
"user_key": "user-1",
},
)
assert response.status_code == 200
data = response.json()
assert data["request_id"] is not None
assert len(data["request_id"]) > 0
@pytest.mark.asyncio
async def test_engine_query_channel_default_web(client):
"""channel 미전송 시 기본값 web 사용 — 응답은 정상."""
response = await client.post(
"/engine/query",
json={
"tenant": "test-tenant",
"utterance": "테스트",
"user_key": "user-1",
},
)
assert response.status_code == 200

View File

@@ -0,0 +1,142 @@
"""
FAQSearchService 단위 테스트.
임베딩/벡터DB는 Mock 사용.
"""
import pytest
import pytest_asyncio
from unittest.mock import AsyncMock, MagicMock
from uuid import uuid4
from app.services.faq_search import FAQSearchService, FAQ_SIMILARITY_THRESHOLD
from app.providers.base import SearchResult
def make_faq(tenant_id: str, question: str = "Q", answer: str = "A") -> MagicMock:
faq = MagicMock()
faq.id = str(uuid4())
faq.tenant_id = tenant_id
faq.question = question
faq.answer = answer
faq.is_active = True
faq.hit_count = 0
faq.updated_at = None
return faq
@pytest.mark.asyncio
async def test_faq_search_above_threshold_returns_faq():
"""유사도 ≥ 0.85 → FAQ 반환."""
faq = make_faq("t1")
faq_id = faq.id
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768])
vectordb = AsyncMock()
vectordb.search = AsyncMock(return_value=[
SearchResult(text="Q\nA", doc_id=f"{faq_id}_0", score=0.92, metadata={"faq_id": faq_id})
])
db = AsyncMock()
db.execute = AsyncMock()
scalar = MagicMock()
scalar.scalar_one_or_none = MagicMock(return_value=faq)
db.execute.return_value = scalar
service = FAQSearchService(embedding, vectordb, db)
result = await service.search("t1", "여권 발급")
assert result is not None
found_faq, score = result
assert found_faq.id == faq_id
assert score >= FAQ_SIMILARITY_THRESHOLD
@pytest.mark.asyncio
async def test_faq_search_below_threshold_returns_none():
"""유사도 < 0.85 → None 반환."""
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768])
vectordb = AsyncMock()
vectordb.search = AsyncMock(return_value=[]) # threshold 미달로 빈 결과
db = AsyncMock()
service = FAQSearchService(embedding, vectordb, db)
result = await service.search("t1", "모르는 질문")
assert result is None
@pytest.mark.asyncio
async def test_faq_search_tenant_isolation():
"""다른 테넌트의 FAQ가 반환되지 않는다."""
faq_tenant_b = make_faq("tenant-B")
faq_id = faq_tenant_b.id
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768])
vectordb = AsyncMock()
vectordb.search = AsyncMock(return_value=[
SearchResult(text="Q\nA", doc_id=f"{faq_id}_0", score=0.95, metadata={"faq_id": faq_id})
])
db = AsyncMock()
db.execute = AsyncMock()
scalar = MagicMock()
# tenant-A로 조회하면 None (다른 테넌트 FAQ)
scalar.scalar_one_or_none = MagicMock(return_value=None)
db.execute.return_value = scalar
service = FAQSearchService(embedding, vectordb, db)
result = await service.search("tenant-A", "테스트 질문") # tenant-A로 검색
assert result is None
@pytest.mark.asyncio
async def test_faq_search_hit_count_increment():
"""FAQ 검색 성공 시 hit_count가 증가한다."""
faq = make_faq("t1")
faq.hit_count = 5
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768])
vectordb = AsyncMock()
vectordb.search = AsyncMock(return_value=[
SearchResult(text="Q\nA", doc_id=f"{faq.id}_0", score=0.90,
metadata={"faq_id": faq.id})
])
db = AsyncMock()
db.execute = AsyncMock()
scalar = MagicMock()
scalar.scalar_one_or_none = MagicMock(return_value=faq)
db.execute.return_value = scalar
db.get = AsyncMock(return_value=faq)
db.commit = AsyncMock()
service = FAQSearchService(embedding, vectordb, db)
result = await service.search("t1", "질문")
assert result is not None
await service.increment_hit(faq.id)
assert faq.hit_count == 6
@pytest.mark.asyncio
async def test_faq_search_embedding_not_implemented_returns_none():
"""EmbeddingProvider가 NotImplementedError → None 반환 (graceful)."""
embedding = AsyncMock()
embedding.embed = AsyncMock(side_effect=NotImplementedError("not configured"))
vectordb = AsyncMock()
db = AsyncMock()
service = FAQSearchService(embedding, vectordb, db)
result = await service.search("t1", "질문")
assert result is None

View File

@@ -0,0 +1,49 @@
import pytest
from app.services.idempotency import IdempotencyCache
class FakeRedis:
def __init__(self):
self._store = {}
async def get(self, key):
return self._store.get(key)
async def setex(self, key, ttl, value):
self._store[key] = value
@pytest.mark.asyncio
async def test_cache_miss_returns_none():
"""get('tenant-1', 'req-1') == None (캐시 없음)."""
cache = IdempotencyCache(FakeRedis())
result = await cache.get("tenant-1", "req-1")
assert result is None
@pytest.mark.asyncio
async def test_set_and_get_returns_same_data():
"""set 후 get → 동일 데이터."""
cache = IdempotencyCache(FakeRedis())
data = {"answer": "test", "tier": "A"}
await cache.set("tenant-1", "req-1", data)
result = await cache.get("tenant-1", "req-1")
assert result == data
@pytest.mark.asyncio
async def test_different_tenant_same_request_id_no_collision():
"""tenant-A에 저장, tenant-B에서 같은 request_id get → None."""
cache = IdempotencyCache(FakeRedis())
await cache.set("tenant-A", "req-1", {"answer": "A"})
result = await cache.get("tenant-B", "req-1")
assert result is None
@pytest.mark.asyncio
async def test_no_request_id_returns_none():
"""get(tenant_id, None) == None."""
cache = IdempotencyCache(FakeRedis())
result = await cache.get("tenant-1", None)
assert result is None

View File

@@ -0,0 +1,60 @@
"""
LLM Provider 인터페이스 테스트.
"""
import pytest
from app.providers.llm import NullLLMProvider
from app.providers.llm_anthropic import AnthropicLLMProvider, OpenAILLMProvider
from app.providers import get_llm_provider
@pytest.mark.asyncio
async def test_null_provider_always_returns_none():
provider = NullLLMProvider()
result = await provider.generate("sys", "user", ["ctx"])
assert result is None
@pytest.mark.asyncio
async def test_null_provider_returns_none_without_context():
provider = NullLLMProvider()
result = await provider.generate("sys", "user", [])
assert result is None
def test_get_llm_provider_none():
provider = get_llm_provider({"LLM_PROVIDER": "none"})
assert isinstance(provider, NullLLMProvider)
def test_get_llm_provider_anthropic():
provider = get_llm_provider({"LLM_PROVIDER": "anthropic", "ANTHROPIC_API_KEY": "test"})
assert isinstance(provider, AnthropicLLMProvider)
def test_get_llm_provider_unknown_raises():
with pytest.raises(ValueError):
get_llm_provider({"LLM_PROVIDER": "unknown_xyz"})
@pytest.mark.asyncio
async def test_anthropic_provider_no_context_returns_none():
"""근거 없으면 API 호출 없이 None."""
provider = AnthropicLLMProvider(api_key="test-key")
result = await provider.generate("sys", "user", context_chunks=[])
assert result is None
@pytest.mark.asyncio
async def test_anthropic_provider_api_failure_returns_none():
"""API 오류 → None (예외 미전파)."""
from unittest.mock import patch, AsyncMock
provider = AnthropicLLMProvider(api_key="invalid-key")
with patch("anthropic.AsyncAnthropic") as mock_cls:
mock_client = AsyncMock()
mock_client.messages.create = AsyncMock(side_effect=Exception("API error"))
mock_cls.return_value = mock_client
result = await provider.generate("sys", "user", context_chunks=["근거"])
assert result is None

View File

@@ -0,0 +1,60 @@
import pytest
from app.services.masking import mask_text, hash_user_key, has_sensitive_data
def test_rrn_full_masking():
"""주민번호 마스킹."""
result = mask_text("주민번호: 901225-1234567")
assert result == "주민번호: ######-*######"
def test_phone_masking():
"""전화번호 마스킹."""
result = mask_text("전화: 010-1234-5678")
assert result == "전화: ***-****-****"
def test_email_masking():
"""이메일 마스킹."""
result = mask_text("user@example.com")
assert result == "***@***.***"
def test_card_masking():
"""카드번호 마스킹."""
result = mask_text("4242-4242-4242-4242")
assert result == "****-****-****-****"
def test_no_false_positive_number():
"""금액 숫자는 마스킹되지 않아야 함."""
original = "예산액: 1,234,567원"
result = mask_text(original)
assert result == original
def test_multiple_patterns_in_one_text():
"""전화번호와 이메일이 같이 있는 문장에서 둘 다 마스킹."""
text = "연락처: 010-1234-5678, 이메일: admin@gov.kr"
result = mask_text(text)
assert "010-1234-5678" not in result
assert "admin@gov.kr" not in result
assert "***-****-****" in result
assert "***@***.***" in result
def test_hash_user_key_length():
"""hash_user_key 결과 길이 == 16."""
assert len(hash_user_key("any_id")) == 16
def test_hash_user_key_deterministic():
"""같은 입력에 항상 같은 결과."""
assert hash_user_key("test_user") == hash_user_key("test_user")
def test_has_sensitive_data_detection():
"""민감 데이터 감지."""
assert has_sensitive_data("010-1234-5678") is True
assert has_sensitive_data("일반 민원 내용입니다") is False

View File

@@ -0,0 +1,114 @@
import pytest
import pytest_asyncio
from unittest.mock import AsyncMock, MagicMock, patch
from app.services.metrics import MetricsCollector
from app.services.routing import RoutingResult
def make_result(source="faq", elapsed_ms=100, is_timeout=False):
return RoutingResult(
answer="test",
tier="A",
source=source,
elapsed_ms=elapsed_ms,
is_timeout=is_timeout,
)
class FakePipeline:
def __init__(self):
self._data = {}
self._sets = {}
self._calls = []
def hincrby(self, key, field, amount):
if key not in self._data:
self._data[key] = {}
self._data[key][field] = self._data[key].get(field, 0) + amount
return self
def zadd(self, key, mapping):
if key not in self._sets:
self._sets[key] = {}
self._sets[key].update(mapping)
return self
def zremrangebyrank(self, key, start, stop):
return self
async def execute(self):
return []
class FakeRedis:
def __init__(self):
self._data = {}
self._sets = {}
self._pipeline = FakePipeline()
def pipeline(self):
return self._pipeline
async def hgetall(self, key):
return {k: str(v) for k, v in self._pipeline._data.get(key, {}).items()}
async def zcard(self, key):
return len(self._pipeline._sets.get(key, {}))
async def zrange(self, key, start, stop, withscores=False):
items = sorted(self._pipeline._sets.get(key, {}).items(), key=lambda x: x[1])
slice_ = items[start:stop + 1 if stop >= 0 else None]
if withscores:
return [(k.encode(), v) for k, v in slice_]
return [k.encode() for k, _ in slice_]
@pytest.mark.asyncio
async def test_record_increments_total_count():
"""record 후 total_count == 1."""
redis = FakeRedis()
collector = MetricsCollector(redis)
result = make_result(source="faq")
await collector.record("tenant-1", result)
assert redis._pipeline._data.get("tenant:tenant-1:metrics", {}).get("total_count", 0) == 1
@pytest.mark.asyncio
async def test_record_faq_increments_faq_hit_count():
"""source='faq'인 result record 후 faq_hit_count == 1."""
redis = FakeRedis()
collector = MetricsCollector(redis)
await collector.record("tenant-1", make_result(source="faq"))
counts = redis._pipeline._data.get("tenant:tenant-1:metrics", {})
assert counts.get("faq_hit_count", 0) == 1
@pytest.mark.asyncio
async def test_record_timeout_increments_timeout_count():
"""is_timeout=True인 result record 후 timeout_count == 1."""
redis = FakeRedis()
collector = MetricsCollector(redis)
await collector.record("tenant-1", make_result(is_timeout=True))
counts = redis._pipeline._data.get("tenant:tenant-1:metrics", {})
assert counts.get("timeout_count", 0) == 1
@pytest.mark.asyncio
async def test_get_overview_returns_rates():
"""total=10, faq=4 → faq_hit_rate == 40.0."""
redis = FakeRedis()
collector = MetricsCollector(redis)
# 수동으로 pipeline 데이터 설정
redis._pipeline._data["tenant:tenant-1:metrics"] = {
"total_count": 10,
"faq_hit_count": 4,
"rag_hit_count": 2,
"fallback_count": 3,
"timeout_count": 1,
"response_ms_sum": 1000,
}
overview = await collector.get_overview("tenant-1")
assert overview["rates"]["faq_hit_rate"] == 40.0

View File

@@ -0,0 +1,68 @@
import pytest
import pytest_asyncio
from httpx import AsyncClient, ASGITransport
from sqlalchemy import select
from app.core.middleware import tenanted_query, system_query, TenantMiddleware
from app.models.knowledge import FAQ
from app.main import app
@pytest.mark.asyncio
async def test_health_exempt_no_tenant_required(client):
"""GET /health → 200."""
response = await client.get("/health")
assert response.status_code == 200
@pytest.mark.asyncio
async def test_token_endpoint_exempt(client):
"""POST /api/admin/token → 400이 아닌 응답 (tenant 오류 아님)."""
response = await client.post("/api/admin/token")
# 엔드포인트가 없으면 404, 있으면 422/200 — 어떤 경우든 400(tenant_required)이 아님
assert response.status_code != 400 or response.json().get("error") != "tenant_required"
@pytest.mark.asyncio
async def test_api_request_without_auth_returns_401(client):
"""GET /api/admin/faqs (인증 없음) → 401 (admin 경로는 미들웨어 exempt, JWT 필요)."""
response = await client.get("/api/admin/faqs")
assert response.status_code == 401
def test_tenanted_query_without_tenant_id_raises_runtime_error():
"""tenanted_query(select(FAQ), FAQ, None) → RuntimeError 발생."""
with pytest.raises(RuntimeError):
tenanted_query(select(FAQ), FAQ, None)
def test_tenanted_query_with_tenant_id_adds_filter():
"""생성된 SQL에 'tenant_id' 포함 확인."""
query = tenanted_query(select(FAQ), FAQ, "test-tenant-id")
sql = str(query.compile())
assert "tenant_id" in sql
def test_tenanted_query_with_empty_string_raises():
"""tenanted_query(select(FAQ), FAQ, '') → RuntimeError 발생."""
with pytest.raises(RuntimeError):
tenanted_query(select(FAQ), FAQ, "")
def test_system_query_has_no_tenant_filter():
"""system_query(q) == q (변경 없음)."""
q = select(FAQ)
result = system_query(q)
assert result is q
@pytest.mark.asyncio
async def test_request_state_has_tenant_id_after_middleware(client):
"""X-Tenant-Slug 헤더 포함 요청 시 request.state.tenant_id 주입 확인."""
# 미들웨어가 tenant_id를 설정하면 400 대신 다른 응답 코드가 나와야 함
response = await client.get(
"/api/admin/faqs",
headers={"X-Tenant-Slug": "test-tenant"}
)
# tenant_required 오류가 아니어야 함 (404 또는 다른 응답)
assert response.status_code != 400 or response.json().get("error") != "tenant_required"

View File

@@ -0,0 +1,95 @@
import pytest
from sqlalchemy import inspect, String, VARCHAR
from app.models.tenant import Tenant, TenantConfig
from app.models.admin import SystemAdmin, AdminUser, AdminRole
from app.models.knowledge import FAQ, Document, CrawlerURL
from app.models.complaint import ComplaintLog
from app.models.moderation import UserRestriction, RestrictionLevel
from app.models.audit import AuditLog
TENANT_REQUIRED_MODELS = [
TenantConfig, FAQ, Document, CrawlerURL, ComplaintLog, UserRestriction, AuditLog, AdminUser
]
def get_column(model, col_name):
"""모델에서 컬럼 객체 반환."""
for col in model.__table__.columns:
if col.name == col_name:
return col
return None
def test_all_tenant_models_have_tenant_id():
"""8개 모델 모두 tenant_id 컬럼 포함 확인."""
for model in TENANT_REQUIRED_MODELS:
col = get_column(model, "tenant_id")
assert col is not None, f"{model.__tablename__} must have tenant_id column"
def test_system_admin_has_no_tenant_id():
"""SystemAdmin 테이블에 tenant_id 없음 확인."""
col = get_column(SystemAdmin, "tenant_id")
assert col is None, "SystemAdmin must NOT have tenant_id column"
def test_all_pk_are_uuid_string():
"""모든 모델 PK 컬럼 타입이 VARCHAR(36) 또는 String."""
all_models = [Tenant, TenantConfig, SystemAdmin, AdminUser, FAQ, Document,
CrawlerURL, ComplaintLog, UserRestriction, AuditLog]
for model in all_models:
pk_cols = [c for c in model.__table__.columns if c.primary_key]
assert len(pk_cols) > 0, f"{model.__tablename__} has no PK"
for col in pk_cols:
col_type = str(col.type)
assert "VARCHAR" in col_type or "CHAR" in col_type or isinstance(col.type, String), \
f"{model.__tablename__}.{col.name} PK must be String/VARCHAR, got {col_type}"
def test_admin_user_email_not_globally_unique():
"""AdminUser.email 컬럼의 unique=False 확인."""
col = get_column(AdminUser, "email")
assert col is not None
assert not col.unique, "AdminUser.email must NOT be globally unique (use tenant-scoped constraint)"
def test_system_admin_email_globally_unique():
"""SystemAdmin.email 컬럼의 unique=True 확인."""
col = get_column(SystemAdmin, "email")
assert col is not None
assert col.unique, "SystemAdmin.email must be globally unique"
def test_document_is_active_default_false():
"""Document.is_active 기본값이 False 확인."""
col = get_column(Document, "is_active")
assert col is not None
assert col.default.arg is False, "Document.is_active default must be False"
def test_complaint_log_has_request_id():
"""ComplaintLog에 request_id 컬럼 존재 확인."""
col = get_column(ComplaintLog, "request_id")
assert col is not None, "ComplaintLog must have request_id column"
def test_audit_log_has_required_columns():
"""AuditLog에 actor_id, actor_type, action, tenant_id 확인."""
required = ["actor_id", "actor_type", "action", "tenant_id"]
for col_name in required:
col = get_column(AuditLog, col_name)
assert col is not None, f"AuditLog must have {col_name} column"
def test_user_restriction_level_range():
"""RestrictionLevel.NORMAL==0, BLOCKED==5."""
assert RestrictionLevel.NORMAL == 0
assert RestrictionLevel.BLOCKED == 5
def test_admin_role_values():
"""AdminRole 값 집합 == {'admin','editor','viewer','readonly_api'}."""
expected = {"admin", "editor", "viewer", "readonly_api"}
actual = {role.value for role in AdminRole}
assert actual == expected

View File

@@ -0,0 +1,141 @@
"""
악성·반복 민원 제한 서비스 테스트.
"""
import pytest
from datetime import datetime, timedelta, timezone
from unittest.mock import AsyncMock, MagicMock
from app.services.moderation import ModerationService, ModerationResult
from app.models.moderation import UserRestriction, RestrictionLevel
def make_db(restriction=None):
db = AsyncMock()
db.add = MagicMock()
db.commit = AsyncMock()
db.execute = AsyncMock()
scalar = MagicMock()
scalar.scalar_one_or_none = MagicMock(return_value=restriction)
db.execute.return_value = scalar
return db
@pytest.mark.asyncio
async def test_check_normal_user_is_allowed():
"""제한 없는 사용자 → 허용."""
db = make_db(restriction=None)
service = ModerationService(db)
result = await service.check("t1", "user-1")
assert result.allowed is True
assert result.level == 0
@pytest.mark.asyncio
async def test_check_blocked_user_is_denied():
"""Level 5 (BLOCKED) → 차단."""
r = MagicMock()
r.level = RestrictionLevel.BLOCKED
r.expires_at = None
db = make_db(restriction=r)
service = ModerationService(db)
result = await service.check("t1", "user-block")
assert result.allowed is False
assert result.level == RestrictionLevel.BLOCKED
@pytest.mark.asyncio
async def test_check_suspended_user_is_denied():
"""Level 4 (SUSPENDED) → 편집장 확인 전 차단."""
r = MagicMock()
r.level = RestrictionLevel.SUSPENDED
r.expires_at = datetime.now(timezone.utc) + timedelta(hours=1)
db = make_db(restriction=r)
service = ModerationService(db)
result = await service.check("t1", "user-sus")
assert result.allowed is False
@pytest.mark.asyncio
async def test_check_warning_user_has_delay():
"""Level 2 (WARNING) → 허용 + 30초 지연."""
r = MagicMock()
r.level = RestrictionLevel.WARNING
r.expires_at = datetime.now(timezone.utc) + timedelta(hours=1)
db = make_db(restriction=r)
service = ModerationService(db)
result = await service.check("t1", "user-warn")
assert result.allowed is True
assert result.delay_seconds == 30
@pytest.mark.asyncio
async def test_escalate_increases_level():
"""레벨 상승: NORMAL → 다음 레벨."""
r = MagicMock(spec=UserRestriction)
r.level = RestrictionLevel.NORMAL
r.expires_at = None
r.auto_applied = True
db = make_db(restriction=r)
service = ModerationService(db)
new_level = await service.escalate("t1", "user-1", "반복 민원")
assert new_level == RestrictionLevel.NORMAL + 1
db.commit.assert_called_once()
@pytest.mark.asyncio
async def test_escalate_stops_at_suspended():
"""Level 4 이상 자동 상승 금지."""
r = MagicMock(spec=UserRestriction)
r.level = RestrictionLevel.SUSPENDED
r.expires_at = None
db = make_db(restriction=r)
service = ModerationService(db)
new_level = await service.escalate("t1", "user-1")
# Level 4에서 멈춤
assert new_level == RestrictionLevel.SUSPENDED
@pytest.mark.asyncio
async def test_release_resets_level():
"""수동 해제 → level 0."""
r = MagicMock(spec=UserRestriction)
r.level = RestrictionLevel.SUSPENDED
r.expires_at = None
r.auto_applied = True
r.applied_by = None
db = make_db(restriction=r)
service = ModerationService(db)
await service.release("t1", "user-1", applied_by="editor-1")
assert r.level == RestrictionLevel.NORMAL
assert r.applied_by == "editor-1"
db.commit.assert_called_once()
@pytest.mark.asyncio
async def test_check_expired_restriction_resets():
"""만료된 제한 → 자동 해제."""
r = MagicMock(spec=UserRestriction)
r.level = RestrictionLevel.WARNING # Level 2
r.expires_at = datetime.now(timezone.utc) - timedelta(hours=1) # 만료
r.auto_applied = True
r.applied_by = None
db = make_db(restriction=r)
service = ModerationService(db)
result = await service.check("t1", "user-expired")
assert result.allowed is True
assert result.level == 0

View File

@@ -0,0 +1,40 @@
import pytest
from app.providers.llm import NullLLMProvider
from app.providers.embedding import NotImplementedEmbeddingProvider
from app.providers import get_llm_provider, get_embedding_provider
@pytest.mark.asyncio
async def test_null_llm_provider_returns_none():
"""NullLLMProvider().generate(...) == None."""
provider = NullLLMProvider()
result = await provider.generate("system", "user", [])
assert result is None
def test_get_llm_provider_none_config():
"""get_llm_provider({'LLM_PROVIDER':'none'}) == NullLLMProvider 인스턴스."""
provider = get_llm_provider({"LLM_PROVIDER": "none"})
assert isinstance(provider, NullLLMProvider)
def test_get_llm_provider_unknown_raises():
"""get_llm_provider({'LLM_PROVIDER':'xyz'}) → ValueError."""
with pytest.raises(ValueError):
get_llm_provider({"LLM_PROVIDER": "xyz"})
@pytest.mark.asyncio
async def test_not_implemented_embedding_raises_on_embed():
"""NotImplementedEmbeddingProvider().embed(['test']) → NotImplementedError."""
provider = NotImplementedEmbeddingProvider()
with pytest.raises(NotImplementedError):
await provider.embed(["test"])
@pytest.mark.asyncio
async def test_not_implemented_embedding_warmup_is_noop():
""".warmup() 호출 시 예외 없이 통과."""
provider = NotImplementedEmbeddingProvider()
await provider.warmup() # 예외 없어야 함

View File

@@ -0,0 +1,150 @@
"""
RAGSearchService 단위 테스트.
"""
import pytest
from unittest.mock import AsyncMock, MagicMock
from uuid import uuid4
from datetime import datetime
from app.services.rag_search import RAGSearchService, RAG_SIMILARITY_THRESHOLD
from app.providers.base import SearchResult
def make_doc(tenant_id: str, filename: str = "안내문.txt") -> MagicMock:
doc = MagicMock()
doc.id = str(uuid4())
doc.tenant_id = tenant_id
doc.filename = filename
doc.is_active = True
doc.published_at = datetime(2026, 3, 1)
return doc
@pytest.mark.asyncio
async def test_rag_search_returns_results_above_threshold():
"""유사도 ≥ 0.70 → RAGSearchResult 반환."""
doc = make_doc("t1")
doc_id = doc.id
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768])
vectordb = AsyncMock()
vectordb.search = AsyncMock(return_value=[
SearchResult(
text="여권은 민원과에서 발급합니다.",
doc_id=f"{doc_id}_0",
score=0.78,
metadata={"doc_id": doc_id, "filename": "여권안내.txt"},
)
])
db = AsyncMock()
db.execute = AsyncMock()
scalar = MagicMock()
scalar.scalar_one_or_none = MagicMock(return_value=doc)
db.execute.return_value = scalar
service = RAGSearchService(embedding, vectordb, db)
results = await service.search("t1", "여권 발급")
assert results is not None
assert len(results) > 0
assert results[0].score >= RAG_SIMILARITY_THRESHOLD
@pytest.mark.asyncio
async def test_rag_search_returns_none_when_no_match():
"""매칭 없으면 None."""
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768])
vectordb = AsyncMock()
vectordb.search = AsyncMock(return_value=[])
db = AsyncMock()
service = RAGSearchService(embedding, vectordb, db)
results = await service.search("t1", "알 수 없는 질문")
assert results is None
@pytest.mark.asyncio
async def test_rag_search_excludes_inactive_docs():
"""is_active=False 문서 → 결과에서 제외."""
doc_id = str(uuid4())
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768])
vectordb = AsyncMock()
vectordb.search = AsyncMock(return_value=[
SearchResult(text="내용", doc_id=f"{doc_id}_0", score=0.85,
metadata={"doc_id": doc_id})
])
db = AsyncMock()
db.execute = AsyncMock()
scalar = MagicMock()
scalar.scalar_one_or_none = MagicMock(return_value=None) # is_active 필터로 None
db.execute.return_value = scalar
service = RAGSearchService(embedding, vectordb, db)
results = await service.search("t1", "질문")
assert results is None
def test_rag_build_answer_includes_citation():
"""응답에 출처(문서명·날짜) 포함."""
doc = make_doc("t1", "여권발급안내.txt")
embedding = AsyncMock()
vectordb = AsyncMock()
db = AsyncMock()
from app.services.rag_search import RAGSearchResult
rag_result = RAGSearchResult(
chunk_text="여권 발급은 민원과에서 처리합니다.",
doc=doc,
score=0.78,
)
service = RAGSearchService(embedding, vectordb, db)
answer = service.build_answer("여권 질문", [rag_result])
assert "출처" in answer
assert "여권발급안내.txt" in answer
@pytest.mark.asyncio
async def test_rag_deduplicates_same_doc_chunks():
"""같은 문서의 여러 청크 → 최고 점수 1개만."""
doc_id = str(uuid4())
doc = make_doc("t1")
doc.id = doc_id
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768])
vectordb = AsyncMock()
vectordb.search = AsyncMock(return_value=[
SearchResult(text="청크1", doc_id=f"{doc_id}_0", score=0.80,
metadata={"doc_id": doc_id}),
SearchResult(text="청크2", doc_id=f"{doc_id}_1", score=0.75,
metadata={"doc_id": doc_id}),
])
db = AsyncMock()
db.execute = AsyncMock()
scalar = MagicMock()
scalar.scalar_one_or_none = MagicMock(return_value=doc)
db.execute.return_value = scalar
service = RAGSearchService(embedding, vectordb, db)
results = await service.search("t1", "질문")
# 같은 문서는 1개만
doc_ids = [r.doc.id for r in results]
assert len(doc_ids) == len(set(doc_ids))

View File

@@ -0,0 +1,40 @@
import pytest
from unittest.mock import AsyncMock, patch, MagicMock
@pytest.mark.asyncio
async def test_ready_ok_when_all_services_up(client):
"""DB/Redis mock 정상 → GET /ready → 200, ready=True."""
mock_redis = AsyncMock()
mock_redis.ping = AsyncMock(return_value=True)
mock_redis.aclose = AsyncMock()
with patch("app.routers.health.aioredis.from_url", return_value=mock_redis):
response = await client.get("/ready")
assert response.status_code == 200
data = response.json()
assert data["ready"] is True
assert data["checks"]["db"] == "ok"
@pytest.mark.asyncio
async def test_ready_503_when_db_down(client):
"""DB mock 오류 → GET /ready → 503, ready=False."""
mock_redis = AsyncMock()
mock_redis.ping = AsyncMock(return_value=True)
mock_redis.aclose = AsyncMock()
# DB 연결 실패 시뮬레이션
mock_session = AsyncMock()
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
mock_session.__aexit__ = AsyncMock(return_value=None)
mock_session.execute = AsyncMock(side_effect=Exception("DB connection failed"))
with patch("app.routers.health.aioredis.from_url", return_value=mock_redis), \
patch("app.routers.health.AsyncSessionLocal", return_value=mock_session):
response = await client.get("/ready")
assert response.status_code == 503
data = response.json()
assert data["ready"] is False

View File

@@ -0,0 +1,83 @@
"""
POST /skill/{tenant_slug} — 카카오 스킬 API 테스트.
"""
import pytest
def make_kakao_body(utterance: str, user_id: str = "kakao-user-1") -> dict:
return {
"userRequest": {
"utterance": utterance,
"user": {"id": user_id},
},
"action": {"params": {}},
}
@pytest.mark.asyncio
async def test_skill_returns_kakao_format(client):
"""스킬 응답이 카카오 포맷(version, template.outputs)을 포함한다."""
response = await client.post(
"/skill/test-tenant",
json=make_kakao_body("여권 발급 방법"),
)
assert response.status_code == 200
data = response.json()
assert data["version"] == "2.0"
assert "template" in data
assert "outputs" in data["template"]
assert len(data["template"]["outputs"]) > 0
assert "simpleText" in data["template"]["outputs"][0]
@pytest.mark.asyncio
async def test_skill_tier_d_fallback_returns_200(client):
"""FAQ 없어도 Tier D 응답으로 200 반환."""
response = await client.post(
"/skill/test-tenant",
json=make_kakao_body("이상한 질문"),
)
assert response.status_code == 200
data = response.json()
text = data["template"]["outputs"][0]["simpleText"]["text"]
assert len(text) > 0
@pytest.mark.asyncio
async def test_skill_utterance_text_in_response(client):
"""응답 text 필드가 비어있지 않다."""
response = await client.post(
"/skill/dongducheon",
json=make_kakao_body("담당부서 알려주세요"),
)
assert response.status_code == 200
text = response.json()["template"]["outputs"][0]["simpleText"]["text"]
assert isinstance(text, str)
assert len(text) > 0
@pytest.mark.asyncio
async def test_skill_user_key_is_hashed(client):
"""user_key는 원본 ID가 아닌 해시값(16자리)으로 처리된다 (응답에 노출 안 됨)."""
# 개인정보(원본 카카오 ID)가 응답 바디에 노출되지 않는지 확인
user_id = "real-kakao-id-12345"
response = await client.post(
"/skill/test-tenant",
json=make_kakao_body("질문", user_id=user_id),
)
assert response.status_code == 200
response_text = response.text
assert user_id not in response_text
@pytest.mark.asyncio
async def test_skill_masked_pii_not_in_answer(client):
"""개인정보가 포함된 발화도 정상 처리 (마스킹 후 라우팅)."""
response = await client.post(
"/skill/test-tenant",
json=make_kakao_body("전화번호 010-1234-5678로 연락해주세요"),
)
assert response.status_code == 200
# 마스킹된 발화로 처리되므로 응답은 정상
data = response.json()
assert "version" in data

View File

@@ -0,0 +1,151 @@
"""
Tier A 라우팅 통합 테스트.
"""
import pytest
from unittest.mock import AsyncMock, MagicMock
from uuid import uuid4
from app.services.routing import ResponseRouter, RoutingResult
from app.providers.base import SearchResult
def make_router(embedding=None, vectordb=None) -> ResponseRouter:
providers = {}
if embedding:
providers["embedding"] = embedding
if vectordb:
providers["vectordb"] = vectordb
return ResponseRouter(
tenant_config={
"phone_number": "031-860-2000",
"fallback_dept": "민원과",
"tenant_name": "동두천시",
},
providers=providers,
)
@pytest.mark.asyncio
async def test_tier_a_returns_when_faq_found():
"""벡터DB에서 FAQ 매칭 → Tier A 반환."""
faq_id = str(uuid4())
faq = MagicMock()
faq.id = faq_id
faq.answer = "여권은 민원과에서 발급합니다."
faq.question = "여권 발급 방법"
faq.is_active = True
faq.hit_count = 0
faq.updated_at = None
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768])
vectordb = AsyncMock()
vectordb.search = AsyncMock(return_value=[
SearchResult(text="여권 발급 방법\n여권은 민원과에서 발급합니다.",
doc_id=f"{faq_id}_0", score=0.92, metadata={"faq_id": faq_id})
])
db = AsyncMock()
db.execute = AsyncMock()
scalar = MagicMock()
scalar.scalar_one_or_none = MagicMock(return_value=faq)
db.execute.return_value = scalar
db.get = AsyncMock(return_value=faq)
db.commit = AsyncMock()
router = make_router(embedding=embedding, vectordb=vectordb)
result = await router.route("tenant-1", "여권 발급 방법", "user-1", db=db)
assert result.tier == "A"
assert result.source == "faq"
assert result.answer == faq.answer
@pytest.mark.asyncio
async def test_tier_a_skipped_when_no_match():
"""매칭 없으면 Tier D 반환."""
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768])
vectordb = AsyncMock()
vectordb.search = AsyncMock(return_value=[]) # 빈 결과
db = AsyncMock()
router = make_router(embedding=embedding, vectordb=vectordb)
result = await router.route("tenant-1", "모르는 질문", "user-1", db=db)
assert result.tier == "D"
assert result.source == "fallback"
@pytest.mark.asyncio
async def test_routing_result_has_faq_id():
"""Tier A 결과에 faq_id가 포함된다."""
faq_id = str(uuid4())
faq = MagicMock()
faq.id = faq_id
faq.answer = "답변"
faq.question = "질문"
faq.is_active = True
faq.hit_count = 0
faq.updated_at = None
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768])
vectordb = AsyncMock()
vectordb.search = AsyncMock(return_value=[
SearchResult(text="Q\nA", doc_id=f"{faq_id}_0", score=0.90,
metadata={"faq_id": faq_id})
])
db = AsyncMock()
db.execute = AsyncMock()
scalar = MagicMock()
scalar.scalar_one_or_none = MagicMock(return_value=faq)
db.execute.return_value = scalar
db.get = AsyncMock(return_value=faq)
db.commit = AsyncMock()
router = make_router(embedding=embedding, vectordb=vectordb)
result = await router.route("tenant-1", "질문", "user-1", db=db)
assert result.faq_id == faq_id
@pytest.mark.asyncio
async def test_routing_result_tier_d_has_fallback_info():
"""Tier D 결과에 담당부서·전화번호가 포함된다."""
router = make_router()
result = await router.route("tenant-1", "알 수 없는 질문", "user-1", db=None)
assert result.tier == "D"
assert "031-860-2000" in result.answer
assert "민원과" in result.answer
@pytest.mark.asyncio
async def test_timeout_returns_tier_d_is_timeout():
"""타임아웃 발생 시 Tier D + is_timeout=True."""
import asyncio
async def slow_embed(texts):
await asyncio.sleep(10) # 타임아웃보다 긴 대기
return [[0.0] * 768]
embedding = AsyncMock()
embedding.embed = AsyncMock(side_effect=slow_embed)
vectordb = AsyncMock()
# 타임아웃을 매우 짧게 설정
router = make_router(embedding=embedding, vectordb=vectordb)
router.TIMEOUT_MS = 50 # 50ms
db = AsyncMock()
result = await router.route("tenant-1", "질문", "user-1", db=db)
assert result.tier == "D"
assert result.is_timeout is True

View File

@@ -0,0 +1,144 @@
"""
Tier B RAG 라우팅 테스트.
"""
import pytest
from unittest.mock import AsyncMock, MagicMock
from uuid import uuid4
from datetime import datetime
from app.services.routing import ResponseRouter
from app.providers.base import SearchResult
def make_router(embedding=None, vectordb=None) -> ResponseRouter:
providers = {}
if embedding:
providers["embedding"] = embedding
if vectordb:
providers["vectordb"] = vectordb
return ResponseRouter(
tenant_config={
"phone_number": "031-860-2000",
"fallback_dept": "민원과",
"tenant_name": "동두천시",
},
providers=providers,
)
def make_doc(doc_id: str) -> MagicMock:
doc = MagicMock()
doc.id = doc_id
doc.filename = "안내문.txt"
doc.is_active = True
doc.published_at = datetime(2026, 3, 1)
return doc
@pytest.mark.asyncio
async def test_tier_b_returns_when_doc_found():
"""문서 RAG 매칭 → Tier B 반환."""
doc_id = str(uuid4())
doc = make_doc(doc_id)
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768])
vectordb = AsyncMock()
vectordb.search = AsyncMock(return_value=[
SearchResult(text="여권은 민원과에서 발급합니다.", doc_id=f"{doc_id}_0",
score=0.75, metadata={"doc_id": doc_id})
])
db = AsyncMock()
db.execute = AsyncMock()
scalar = MagicMock()
scalar.scalar_one_or_none = MagicMock(return_value=doc)
db.execute.return_value = scalar
router = make_router(embedding=embedding, vectordb=vectordb)
result = await router.route("tenant-1", "여권 발급 방법", "user-1", db=db)
assert result.tier == "B"
assert result.source == "rag"
@pytest.mark.asyncio
async def test_tier_b_skipped_when_no_doc_match():
"""문서 매칭 없으면 Tier D."""
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768])
vectordb = AsyncMock()
vectordb.search = AsyncMock(return_value=[])
db = AsyncMock()
router = make_router(embedding=embedding, vectordb=vectordb)
result = await router.route("tenant-1", "모르는 질문", "user-1", db=db)
assert result.tier == "D"
@pytest.mark.asyncio
async def test_tier_b_result_has_doc_name():
"""Tier B 결과에 doc_name 포함."""
doc_id = str(uuid4())
doc = make_doc(doc_id)
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768])
vectordb = AsyncMock()
vectordb.search = AsyncMock(return_value=[
SearchResult(text="내용", doc_id=f"{doc_id}_0", score=0.72,
metadata={"doc_id": doc_id})
])
db = AsyncMock()
db.execute = AsyncMock()
scalar = MagicMock()
scalar.scalar_one_or_none = MagicMock(return_value=doc)
db.execute.return_value = scalar
router = make_router(embedding=embedding, vectordb=vectordb)
result = await router.route("tenant-1", "질문", "user-1", db=db)
assert result.doc_name is not None
@pytest.mark.asyncio
async def test_tier_a_takes_priority_over_tier_b():
"""FAQ 유사도 ≥ 0.85 → Tier A (Tier B 미실행)."""
faq_id = str(uuid4())
faq = MagicMock()
faq.id = faq_id
faq.answer = "FAQ 답변"
faq.question = "질문"
faq.is_active = True
faq.hit_count = 0
faq.updated_at = None
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768])
# FAQ 검색: 높은 유사도
vectordb = AsyncMock()
vectordb.search = AsyncMock(return_value=[
SearchResult(text="Q\nA", doc_id=f"{faq_id}_0", score=0.92,
metadata={"faq_id": faq_id})
])
db = AsyncMock()
db.execute = AsyncMock()
scalar = MagicMock()
scalar.scalar_one_or_none = MagicMock(return_value=faq)
db.execute.return_value = scalar
db.get = AsyncMock(return_value=faq)
db.commit = AsyncMock()
router = make_router(embedding=embedding, vectordb=vectordb)
result = await router.route("tenant-1", "질문", "user-1", db=db)
# FAQ가 먼저 매칭되면 Tier A
assert result.tier == "A"

View File

@@ -0,0 +1,164 @@
"""
Tier C — LLM 기반 재서술 라우팅 테스트.
"""
import pytest
from unittest.mock import AsyncMock, MagicMock
from uuid import uuid4
from datetime import datetime
from app.services.routing import ResponseRouter
from app.providers.llm import NullLLMProvider
from app.providers.base import SearchResult
def make_doc(doc_id: str) -> MagicMock:
doc = MagicMock()
doc.id = doc_id
doc.filename = "안내문.txt"
doc.is_active = True
doc.published_at = datetime(2026, 3, 1)
return doc
def make_router(embedding=None, vectordb=None, llm=None) -> ResponseRouter:
providers = {}
if embedding:
providers["embedding"] = embedding
if vectordb:
providers["vectordb"] = vectordb
if llm:
providers["llm"] = llm
return ResponseRouter(
tenant_config={
"phone_number": "031-860-2000",
"fallback_dept": "민원과",
"tenant_name": "동두천시",
},
providers=providers,
)
@pytest.mark.asyncio
async def test_tier_c_returns_llm_answer():
"""LLM 활성화 + RAG 근거 있음 → Tier C 반환."""
doc_id = str(uuid4())
doc = make_doc(doc_id)
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768])
vectordb = AsyncMock()
vectordb.search = AsyncMock(return_value=[
SearchResult(text="여권은 민원과에서 발급합니다.", doc_id=f"{doc_id}_0",
score=0.75, metadata={"doc_id": doc_id})
])
db = AsyncMock()
db.execute = AsyncMock()
scalar = MagicMock()
scalar.scalar_one_or_none = MagicMock(return_value=doc)
db.execute.return_value = scalar
llm = AsyncMock()
llm.generate = AsyncMock(return_value="여권 발급은 민원과(031-860-2000)에서 신청하시면 됩니다.")
router = make_router(embedding=embedding, vectordb=vectordb, llm=llm)
result = await router.route("tenant-1", "여권 어디서 받아요", "user-1", db=db)
assert result.tier == "C"
assert result.source == "llm"
assert "여권" in result.answer
@pytest.mark.asyncio
async def test_tier_c_skipped_when_llm_is_null():
"""NullLLMProvider → Tier C 스킵 → Tier B 또는 D."""
doc_id = str(uuid4())
doc = make_doc(doc_id)
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768])
vectordb = AsyncMock()
vectordb.search = AsyncMock(return_value=[
SearchResult(text="내용", doc_id=f"{doc_id}_0", score=0.75,
metadata={"doc_id": doc_id})
])
db = AsyncMock()
db.execute = AsyncMock()
scalar = MagicMock()
scalar.scalar_one_or_none = MagicMock(return_value=doc)
db.execute.return_value = scalar
llm = NullLLMProvider() # none 기본값
router = make_router(embedding=embedding, vectordb=vectordb, llm=llm)
result = await router.route("tenant-1", "질문", "user-1", db=db)
# NullLLM → Tier C 스킵 → Tier B (RAG 매칭 있으므로)
assert result.tier in ("B", "D")
assert result.tier != "C"
@pytest.mark.asyncio
async def test_tier_c_skipped_when_no_rag_context():
"""RAG 근거 없음 → LLM 미호출 (할루시네이션 방지)."""
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768])
vectordb = AsyncMock()
vectordb.search = AsyncMock(return_value=[]) # 근거 없음
db = AsyncMock()
llm = AsyncMock()
llm.generate = AsyncMock(return_value="LLM 답변")
router = make_router(embedding=embedding, vectordb=vectordb, llm=llm)
result = await router.route("tenant-1", "질문", "user-1", db=db)
# 근거 없으면 LLM 미호출 → Tier D
assert result.tier == "D"
llm.generate.assert_not_called()
@pytest.mark.asyncio
async def test_tier_c_falls_back_to_d_when_llm_fails():
"""LLM 실패(None 반환) → Tier D 폴백."""
doc_id = str(uuid4())
doc = make_doc(doc_id)
embedding = AsyncMock()
embedding.embed = AsyncMock(return_value=[[0.1] * 768])
vectordb = AsyncMock()
vectordb.search = AsyncMock(return_value=[
SearchResult(text="내용", doc_id=f"{doc_id}_0", score=0.75,
metadata={"doc_id": doc_id})
])
db = AsyncMock()
db.execute = AsyncMock()
scalar = MagicMock()
scalar.scalar_one_or_none = MagicMock(return_value=doc)
db.execute.return_value = scalar
llm = AsyncMock()
llm.generate = AsyncMock(return_value=None) # LLM 실패
router = make_router(embedding=embedding, vectordb=vectordb, llm=llm)
result = await router.route("tenant-1", "질문", "user-1", db=db)
# LLM None → Tier C 없음 → Tier B (RAG 있으므로)
assert result.tier in ("B", "D")
assert result.tier != "C"
@pytest.mark.asyncio
async def test_llm_not_called_without_context_chunks():
"""AnthropicLLMProvider: context_chunks 비어있으면 None."""
from app.providers.llm_anthropic import AnthropicLLMProvider
provider = AnthropicLLMProvider(api_key="test-key")
result = await provider.generate("system", "user", context_chunks=[])
assert result is None