conai/backend/app/services/rag_service.py

"""
RAG (Retrieval-Augmented Generation) service.
Embeds questions, retrieves relevant chunks, and generates answers with Claude.
"""
import httpx
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, text
from app.config import settings
from app.models.rag import RagChunk, RagSource
from app.services.ai_engine import complete
from app.services.prompts.rag import SYSTEM_PROMPT, build_prompt


async def embed_text(text_input: str) -> list[float]:
    """Get embedding vector for text using Voyage AI or OpenAI."""
    if settings.VOYAGE_API_KEY:
        return await _embed_voyage(text_input)
    elif settings.OPENAI_API_KEY:
        return await _embed_openai(text_input)
    else:
        raise ValueError("임베딩 API 키가 설정되지 않았습니다 (VOYAGE_API_KEY 또는 OPENAI_API_KEY)")


async def _embed_voyage(text_input: str) -> list[float]:
    async with httpx.AsyncClient(timeout=30.0) as client:
        resp = await client.post(
            "https://api.voyageai.com/v1/embeddings",
            headers={"Authorization": f"Bearer {settings.VOYAGE_API_KEY}"},
            json={"model": settings.EMBEDDING_MODEL, "input": text_input},
        )
        resp.raise_for_status()
        return resp.json()["data"][0]["embedding"]


async def _embed_openai(text_input: str) -> list[float]:
    async with httpx.AsyncClient(timeout=30.0) as client:
        resp = await client.post(
            "https://api.openai.com/v1/embeddings",
            headers={"Authorization": f"Bearer {settings.OPENAI_API_KEY}"},
            json={"model": "text-embedding-3-small", "input": text_input},
        )
        resp.raise_for_status()
        return resp.json()["data"][0]["embedding"]


async def retrieve_chunks(
    db: AsyncSession,
    question_embedding: list[float],
    top_k: int = 5,
    source_types: list[str] | None = None,
) -> list[dict]:
    """Retrieve most relevant chunks using pgvector cosine similarity."""
    embedding_str = "[" + ",".join(str(x) for x in question_embedding) + "]"

    # Build query with optional source type filter
    source_filter = ""
    if source_types:
        types_str = ", ".join(f"'{t}'" for t in source_types)
        source_filter = f"AND rs.source_type IN ({types_str})"

    query = text(f"""
        SELECT
            rc.id,
            rc.content,
            rc.metadata,
            rs.title,
            rs.source_type,
            1 - (rc.embedding <=> '{embedding_str}'::vector) AS relevance_score
        FROM rag_chunks rc
        JOIN rag_sources rs ON rs.id = rc.source_id
        WHERE rc.embedding IS NOT NULL
        {source_filter}
        ORDER BY rc.embedding <=> '{embedding_str}'::vector
        LIMIT {top_k}
    """)

    result = await db.execute(query)
    rows = result.fetchall()

    return [
        {
            "id": str(row.id),
            "content": row.content,
            "metadata": row.metadata,
            "title": row.title,
            "source_type": row.source_type,
            "relevance_score": float(row.relevance_score),
        }
        for row in rows
    ]


async def ask(
    db: AsyncSession,
    question: str,
    top_k: int = 5,
    source_types: list[str] | None = None,
) -> dict:
    """Full RAG pipeline: embed -> retrieve -> generate."""
    # 1. Embed the question
    embedding = await embed_text(question)

    # 2. Retrieve relevant chunks
    chunks = await retrieve_chunks(db, embedding, top_k, source_types)

    if not chunks:
        return {
            "question": question,
            "answer": "관련 자료를 찾을 수 없습니다. 더 구체적인 질문을 입력하거나, 관련 자료가 업로드되었는지 확인해주세요.",
            "sources": [],
        }

    # 3. Build prompt and generate answer
    prompt = build_prompt(question, chunks)
    answer = await complete(
        messages=[{"role": "user", "content": prompt}],
        system=SYSTEM_PROMPT,
        temperature=0.5,
    )

    return {
        "question": question,
        "answer": answer,
        "sources": chunks,
    }