feat: add Gemini-powered birdseye rendering

2026-04-04 19:29:27 +09:00
parent 800c7b6fa7
commit 5b96be3104
20 changed files with 1006 additions and 396 deletions
--- a/civilplan_mcp/init.py
+++ b/civilplan_mcp/init.py
@@ -1,3 +1,3 @@
 __all__ = ["__version__"]

-__version__ = "1.0.0"
+__version__ = "2.0.0"
--- a/civilplan_mcp/config.py
+++ b/civilplan_mcp/config.py
@@ -28,7 +28,7 @@ def _load_secure_api_keys(path: Path) -> dict[str, str]:

 class Settings(BaseModel):
    app_name: str = "civilplan_mcp"
-    version: str = "1.0.0"
+    version: str = "2.0.0"
    host: str = "127.0.0.1"
    port: int = 8765
    http_path: str = "/mcp"
@@ -38,6 +38,7 @@ class Settings(BaseModel):
    key_store_path: Path = Field(default_factory=default_key_store_path)
    data_go_kr_api_key: str = Field(default_factory=lambda: os.getenv("DATA_GO_KR_API_KEY", ""))
    vworld_api_key: str = Field(default_factory=lambda: os.getenv("VWORLD_API_KEY", ""))
+    gemini_api_key: str = Field(default_factory=lambda: os.getenv("GEMINI_API_KEY", ""))


@lru_cache(maxsize=1)
@@ -50,6 +51,8 @@ def get_settings() -> Settings:
        settings.data_go_kr_api_key = secure_keys.get("DATA_GO_KR_API_KEY", "")
    if not settings.vworld_api_key:
        settings.vworld_api_key = secure_keys.get("VWORLD_API_KEY", "")
+    if not settings.gemini_api_key:
+        settings.gemini_api_key = secure_keys.get("GEMINI_API_KEY", "")

    settings.output_dir.mkdir(parents=True, exist_ok=True)
    return settings
@@ -62,4 +65,6 @@ def check_api_keys() -> list[str]:
        missing.append("DATA_GO_KR_API_KEY")
    if not settings.vworld_api_key:
        missing.append("VWORLD_API_KEY")
+    if not settings.gemini_api_key:
+        missing.append("GEMINI_API_KEY")
    return missing
--- a/civilplan_mcp/prompts/init.py
+++ b/civilplan_mcp/prompts/init.py
@@ -0,0 +1,3 @@
+from civilplan_mcp.prompts.birdseye_templates import DOMAIN_PROMPTS, VIEW_INSTRUCTIONS, build_prompt
+
+__all__ = ["DOMAIN_PROMPTS", "VIEW_INSTRUCTIONS", "build_prompt"]
--- a/civilplan_mcp/prompts/birdseye_templates.py
+++ b/civilplan_mcp/prompts/birdseye_templates.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+
+from typing import Any
+
+
+DOMAIN_PROMPTS: dict[str, str] = {
+    "road": (
+        "Focus on the road alignment, lane markings, shoulders, drainage channels, utility corridors, "
+        "guard rails, and the surrounding Korean rural or suburban context."
+    ),
+    "building": (
+        "Focus on the building massing, facade materials, rooftop equipment, parking, pedestrian circulation, "
+        "and the surrounding Korean urban block."
+    ),
+    "water": (
+        "Focus on pipeline routing, manholes, pump stations, treatment structures, trench alignment, "
+        "and road-side utility coordination."
+    ),
+    "river": (
+        "Focus on embankments, flood-control structures, riprap, levee walks, bridge crossings, "
+        "and natural riparian vegetation."
+    ),
+    "landscape": (
+        "Focus on planting composition, trails, plazas, seating, play areas, water features, "
+        "and seasonal Korean vegetation."
+    ),
+    "mixed": (
+        "Show a comprehensive development site where roads, buildings, utility systems, and landscape work together "
+        "as one coordinated Korean construction project."
+    ),
+}
+
+VIEW_INSTRUCTIONS: dict[str, str] = {
+    "birdseye": (
+        "Create a photorealistic bird's-eye view rendering with an aerial camera angle around 45 to 60 degrees, "
+        "covering the full project extent and nearby context."
+    ),
+    "perspective": (
+        "Create a photorealistic perspective rendering from a representative human-scale viewpoint, "
+        "showing how the project feels on the ground."
+    ),
+}
+
+
+def build_prompt(
+    *,
+    view_type: str,
+    project_type: str,
+    project_summary: str,
+    details: dict[str, Any],
+) -> str:
+    view_instruction = VIEW_INSTRUCTIONS.get(view_type, VIEW_INSTRUCTIONS["birdseye"])
+    domain_instruction = DOMAIN_PROMPTS.get(project_type, DOMAIN_PROMPTS["mixed"])
+    detail_lines = [f"- {key}: {value}" for key, value in details.items() if value not in (None, "", [], {})]
+    detail_block = "\n".join(detail_lines) if detail_lines else "- No additional technical details provided."
+
+    return (
+        f"{view_instruction}\n\n"
+        f"Project summary:\n{project_summary}\n\n"
+        f"Technical details:\n{detail_block}\n\n"
+        f"Domain guidance:\n{domain_instruction}\n\n"
+        "Style requirements:\n"
+        "- Professional architectural visualization for a Korean civil or building project.\n"
+        "- Clear daytime weather, realistic materials, and readable spatial hierarchy.\n"
+        "- Include surrounding terrain, access roads, and scale cues where appropriate.\n"
+        "- Avoid people-heavy staging, exaggerated concept-art effects, or fantasy aesthetics."
+    )
--- a/civilplan_mcp/server.py
+++ b/civilplan_mcp/server.py
@@ -16,6 +16,7 @@ from civilplan_mcp import __version__
 from civilplan_mcp.config import check_api_keys, get_settings
 from civilplan_mcp.tools.benchmark_validator import validate_against_benchmark
 from civilplan_mcp.tools.bid_type_selector import select_bid_type
+from civilplan_mcp.tools.birdseye_generator import generate_birdseye_view
 from civilplan_mcp.tools.boq_generator import generate_boq_excel
 from civilplan_mcp.tools.budget_report_generator import generate_budget_report
 from civilplan_mcp.tools.doc_generator import generate_investment_doc
@@ -113,6 +114,7 @@ def build_mcp() -> FastMCP:
    _register_read_tool(app, "civilplan_validate_against_benchmark", validate_against_benchmark)
    _register_write_tool(app, "civilplan_generate_budget_report", generate_budget_report)
    _register_write_tool(app, "civilplan_generate_dxf_drawing", generate_dxf_drawing)
+    _register_write_tool(app, "civilplan_generate_birdseye_view", generate_birdseye_view)
    return app


--- a/civilplan_mcp/services/init.py
+++ b/civilplan_mcp/services/init.py
@@ -0,0 +1,3 @@
+from civilplan_mcp.services.gemini_image import GeminiImageService
+
+__all__ = ["GeminiImageService"]
--- a/civilplan_mcp/services/gemini_image.py
+++ b/civilplan_mcp/services/gemini_image.py
@@ -0,0 +1,122 @@
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import Any
+
+from PIL import Image as PILImage
+
+try:
+    from google import genai
+    from google.genai import types as genai_types
+except ImportError:  # pragma: no cover - exercised in tests via runtime guard
+    genai = None
+    genai_types = None
+
+
+logger = logging.getLogger(__name__)
+
+
+class GeminiImageService:
+    def __init__(
+        self,
+        *,
+        api_key: str,
+        model: str = "gemini-3-pro-image-preview",
+        client: Any | None = None,
+    ) -> None:
+        self.api_key = api_key
+        self.model = model
+        self._client = client or self._build_client()
+
+    def _build_client(self) -> Any:
+        if genai is None:
+            raise RuntimeError("google-genai is not installed. Install it to use GeminiImageService.")
+        return genai.Client(api_key=self.api_key)
+
+    def _build_config(self, *, aspect_ratio: str, image_size: str) -> Any:
+        if genai_types is None:
+            return {
+                "response_modalities": ["TEXT", "IMAGE"],
+                "image_config": {
+                    "aspect_ratio": aspect_ratio,
+                    "image_size": image_size,
+                },
+            }
+
+        image_config_factory = getattr(genai_types, "ImageConfig", None)
+        generate_config_factory = getattr(genai_types, "GenerateContentConfig", None)
+        image_config = (
+            image_config_factory(aspect_ratio=aspect_ratio, image_size=image_size)
+            if callable(image_config_factory)
+            else {
+                "aspect_ratio": aspect_ratio,
+                "image_size": image_size,
+            }
+        )
+
+        if callable(generate_config_factory):
+            return generate_config_factory(
+                response_modalities=["TEXT", "IMAGE"],
+                image_config=image_config,
+            )
+
+        return {
+            "response_modalities": ["TEXT", "IMAGE"],
+            "image_config": image_config,
+        }
+
+    @staticmethod
+    def _extract_parts(response: Any) -> list[Any]:
+        direct_parts = getattr(response, "parts", None)
+        if direct_parts:
+            return list(direct_parts)
+
+        candidates = getattr(response, "candidates", None) or []
+        for candidate in candidates:
+            candidate_parts = getattr(getattr(candidate, "content", None), "parts", None)
+            if candidate_parts:
+                return list(candidate_parts)
+        return []
+
+    def generate_image(
+        self,
+        *,
+        prompt: str,
+        output_path: str,
+        reference_image_path: str | None = None,
+        aspect_ratio: str = "16:9",
+        image_size: str = "2K",
+    ) -> dict[str, str]:
+        reference_image: PILImage.Image | None = None
+
+        try:
+            contents: list[Any] = [prompt]
+            if reference_image_path:
+                reference_image = PILImage.open(reference_image_path)
+                contents.append(reference_image)
+
+            response = self._client.models.generate_content(
+                model=self.model,
+                contents=contents,
+                config=self._build_config(aspect_ratio=aspect_ratio, image_size=image_size),
+            )
+
+            for part in self._extract_parts(response):
+                if getattr(part, "inline_data", None) is not None and hasattr(part, "as_image"):
+                    output = Path(output_path)
+                    output.parent.mkdir(parents=True, exist_ok=True)
+                    part.as_image().save(str(output))
+                    return {"status": "success", "path": str(output)}
+
+            text_parts = [str(part.text).strip() for part in self._extract_parts(response) if getattr(part, "text", None)]
+            message = "No image in API response."
+            if text_parts:
+                message = f"{message} {' '.join(text_parts)}"
+            return {"status": "error", "error": message}
+        except Exception as exc:
+            logger.exception("Gemini image generation failed.")
+            return {"status": "error", "error": str(exc)}
+        finally:
+            if reference_image is not None:
+                reference_image.close()
--- a/civilplan_mcp/setup_keys.py
+++ b/civilplan_mcp/setup_keys.py
@@ -42,11 +42,13 @@ def main(argv: list[str] | None = None) -> int:

    data_go_kr_api_key = _prompt_value("DATA_GO_KR_API_KEY", imported.get("DATA_GO_KR_API_KEY", ""))
    vworld_api_key = _prompt_value("VWORLD_API_KEY", imported.get("VWORLD_API_KEY", ""))
+    gemini_api_key = _prompt_value("GEMINI_API_KEY", imported.get("GEMINI_API_KEY", ""))

    target = save_api_keys(
        {
            "DATA_GO_KR_API_KEY": data_go_kr_api_key,
            "VWORLD_API_KEY": vworld_api_key,
+            "GEMINI_API_KEY": gemini_api_key,
        }
    )

--- a/civilplan_mcp/tools/birdseye_generator.py
+++ b/civilplan_mcp/tools/birdseye_generator.py
@@ -0,0 +1,129 @@
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import Any
+
+from civilplan_mcp.config import get_settings
+from civilplan_mcp.models import ProjectDomain
+from civilplan_mcp.prompts.birdseye_templates import build_prompt
+from civilplan_mcp.services.gemini_image import GeminiImageService
+from civilplan_mcp.tools._base import wrap_response
+
+
+logger = logging.getLogger(__name__)
+
+DOMAIN_TO_PROJECT_TYPE = {
+    "토목_도로": "road",
+    "건축": "building",
+    "토목_상하수도": "water",
+    "토목_하천": "river",
+    "조경": "landscape",
+    "복합": "mixed",
+}
+
+
+def _domain_to_project_type(domain: str) -> str:
+    return DOMAIN_TO_PROJECT_TYPE.get(domain, "mixed")
+
+
+def _resolve_domain(domain: str | None) -> ProjectDomain:
+    try:
+        return ProjectDomain(domain or ProjectDomain.복합.value)
+    except ValueError:
+        return ProjectDomain.복합
+
+
+def svg_to_png(svg_content: str, output_path: str) -> str:
+    import cairosvg
+
+    cairosvg.svg2png(bytestring=svg_content.encode("utf-8"), write_to=output_path)
+    return output_path
+
+
+def generate_birdseye_view(
+    *,
+    project_summary: str,
+    project_spec: dict[str, Any],
+    svg_drawing: str | None = None,
+    resolution: str = "2K",
+) -> dict[str, Any]:
+    settings = get_settings()
+    domain = _resolve_domain(project_spec.get("domain"))
+    project_id = project_spec.get("project_id", "birdseye-render")
+
+    if not settings.gemini_api_key:
+        return wrap_response(
+            {
+                "status": "error",
+                "project_id": project_id,
+                "error": "GEMINI_API_KEY is not configured. Add it to .env or store it with python setup_keys.py.",
+            },
+            domain,
+        )
+
+    output_dir = Path(project_spec.get("output_dir") or settings.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    details: dict[str, Any] = {}
+    if isinstance(project_spec.get("road"), dict):
+        details.update({key: value for key, value in project_spec["road"].items() if value is not None})
+    for key in ("terrain", "region", "utilities", "year_start", "year_end"):
+        value = project_spec.get(key)
+        if value not in (None, "", [], {}):
+            details[key] = value
+
+    reference_image_path: str | None = None
+    if svg_drawing:
+        try:
+            reference_image_path = svg_to_png(svg_drawing, str(output_dir / f"{project_id}_reference.png"))
+        except Exception as exc:
+            logger.warning("Failed to convert SVG reference for birdseye render: %s", exc)
+
+    service = GeminiImageService(api_key=settings.gemini_api_key)
+    project_type = _domain_to_project_type(domain.value)
+
+    birdseye_result = service.generate_image(
+        prompt=build_prompt(
+            view_type="birdseye",
+            project_type=project_type,
+            project_summary=project_summary,
+            details=details,
+        ),
+        output_path=str(output_dir / f"{project_id}_birdseye.png"),
+        reference_image_path=reference_image_path,
+        aspect_ratio="16:9",
+        image_size=resolution,
+    )
+    perspective_result = service.generate_image(
+        prompt=build_prompt(
+            view_type="perspective",
+            project_type=project_type,
+            project_summary=project_summary,
+            details=details,
+        ),
+        output_path=str(output_dir / f"{project_id}_perspective.png"),
+        reference_image_path=reference_image_path,
+        aspect_ratio="16:9",
+        image_size=resolution,
+    )
+
+    if birdseye_result["status"] == "success" and perspective_result["status"] == "success":
+        status = "success"
+    elif birdseye_result["status"] == "success" or perspective_result["status"] == "success":
+        status = "partial"
+    else:
+        status = "error"
+
+    return wrap_response(
+        {
+            "status": status,
+            "project_id": project_id,
+            "model": service.model,
+            "resolution": resolution,
+            "reference_image_path": reference_image_path,
+            "birdseye_view": birdseye_result,
+            "perspective_view": perspective_result,
+        },
+        domain,
+    )