feat: add Gemini-powered birdseye rendering

This commit is contained in:
sinmb79
2026-04-04 19:29:27 +09:00
parent 800c7b6fa7
commit 5b96be3104
20 changed files with 1006 additions and 396 deletions

View File

@@ -1,3 +1,3 @@
__all__ = ["__version__"]
__version__ = "1.0.0"
__version__ = "2.0.0"

View File

@@ -28,7 +28,7 @@ def _load_secure_api_keys(path: Path) -> dict[str, str]:
class Settings(BaseModel):
app_name: str = "civilplan_mcp"
version: str = "1.0.0"
version: str = "2.0.0"
host: str = "127.0.0.1"
port: int = 8765
http_path: str = "/mcp"
@@ -38,6 +38,7 @@ class Settings(BaseModel):
key_store_path: Path = Field(default_factory=default_key_store_path)
data_go_kr_api_key: str = Field(default_factory=lambda: os.getenv("DATA_GO_KR_API_KEY", ""))
vworld_api_key: str = Field(default_factory=lambda: os.getenv("VWORLD_API_KEY", ""))
gemini_api_key: str = Field(default_factory=lambda: os.getenv("GEMINI_API_KEY", ""))
@lru_cache(maxsize=1)
@@ -50,6 +51,8 @@ def get_settings() -> Settings:
settings.data_go_kr_api_key = secure_keys.get("DATA_GO_KR_API_KEY", "")
if not settings.vworld_api_key:
settings.vworld_api_key = secure_keys.get("VWORLD_API_KEY", "")
if not settings.gemini_api_key:
settings.gemini_api_key = secure_keys.get("GEMINI_API_KEY", "")
settings.output_dir.mkdir(parents=True, exist_ok=True)
return settings
@@ -62,4 +65,6 @@ def check_api_keys() -> list[str]:
missing.append("DATA_GO_KR_API_KEY")
if not settings.vworld_api_key:
missing.append("VWORLD_API_KEY")
if not settings.gemini_api_key:
missing.append("GEMINI_API_KEY")
return missing

View File

@@ -0,0 +1,3 @@
from civilplan_mcp.prompts.birdseye_templates import DOMAIN_PROMPTS, VIEW_INSTRUCTIONS, build_prompt
__all__ = ["DOMAIN_PROMPTS", "VIEW_INSTRUCTIONS", "build_prompt"]

View File

@@ -0,0 +1,67 @@
from __future__ import annotations
from typing import Any
DOMAIN_PROMPTS: dict[str, str] = {
"road": (
"Focus on the road alignment, lane markings, shoulders, drainage channels, utility corridors, "
"guard rails, and the surrounding Korean rural or suburban context."
),
"building": (
"Focus on the building massing, facade materials, rooftop equipment, parking, pedestrian circulation, "
"and the surrounding Korean urban block."
),
"water": (
"Focus on pipeline routing, manholes, pump stations, treatment structures, trench alignment, "
"and road-side utility coordination."
),
"river": (
"Focus on embankments, flood-control structures, riprap, levee walks, bridge crossings, "
"and natural riparian vegetation."
),
"landscape": (
"Focus on planting composition, trails, plazas, seating, play areas, water features, "
"and seasonal Korean vegetation."
),
"mixed": (
"Show a comprehensive development site where roads, buildings, utility systems, and landscape work together "
"as one coordinated Korean construction project."
),
}
VIEW_INSTRUCTIONS: dict[str, str] = {
"birdseye": (
"Create a photorealistic bird's-eye view rendering with an aerial camera angle around 45 to 60 degrees, "
"covering the full project extent and nearby context."
),
"perspective": (
"Create a photorealistic perspective rendering from a representative human-scale viewpoint, "
"showing how the project feels on the ground."
),
}
def build_prompt(
*,
view_type: str,
project_type: str,
project_summary: str,
details: dict[str, Any],
) -> str:
view_instruction = VIEW_INSTRUCTIONS.get(view_type, VIEW_INSTRUCTIONS["birdseye"])
domain_instruction = DOMAIN_PROMPTS.get(project_type, DOMAIN_PROMPTS["mixed"])
detail_lines = [f"- {key}: {value}" for key, value in details.items() if value not in (None, "", [], {})]
detail_block = "\n".join(detail_lines) if detail_lines else "- No additional technical details provided."
return (
f"{view_instruction}\n\n"
f"Project summary:\n{project_summary}\n\n"
f"Technical details:\n{detail_block}\n\n"
f"Domain guidance:\n{domain_instruction}\n\n"
"Style requirements:\n"
"- Professional architectural visualization for a Korean civil or building project.\n"
"- Clear daytime weather, realistic materials, and readable spatial hierarchy.\n"
"- Include surrounding terrain, access roads, and scale cues where appropriate.\n"
"- Avoid people-heavy staging, exaggerated concept-art effects, or fantasy aesthetics."
)

View File

@@ -16,6 +16,7 @@ from civilplan_mcp import __version__
from civilplan_mcp.config import check_api_keys, get_settings
from civilplan_mcp.tools.benchmark_validator import validate_against_benchmark
from civilplan_mcp.tools.bid_type_selector import select_bid_type
from civilplan_mcp.tools.birdseye_generator import generate_birdseye_view
from civilplan_mcp.tools.boq_generator import generate_boq_excel
from civilplan_mcp.tools.budget_report_generator import generate_budget_report
from civilplan_mcp.tools.doc_generator import generate_investment_doc
@@ -113,6 +114,7 @@ def build_mcp() -> FastMCP:
_register_read_tool(app, "civilplan_validate_against_benchmark", validate_against_benchmark)
_register_write_tool(app, "civilplan_generate_budget_report", generate_budget_report)
_register_write_tool(app, "civilplan_generate_dxf_drawing", generate_dxf_drawing)
_register_write_tool(app, "civilplan_generate_birdseye_view", generate_birdseye_view)
return app

View File

@@ -0,0 +1,3 @@
from civilplan_mcp.services.gemini_image import GeminiImageService
__all__ = ["GeminiImageService"]

View File

@@ -0,0 +1,122 @@
from __future__ import annotations
import logging
from pathlib import Path
from typing import Any
from PIL import Image as PILImage
try:
from google import genai
from google.genai import types as genai_types
except ImportError: # pragma: no cover - exercised in tests via runtime guard
genai = None
genai_types = None
logger = logging.getLogger(__name__)
class GeminiImageService:
def __init__(
self,
*,
api_key: str,
model: str = "gemini-3-pro-image-preview",
client: Any | None = None,
) -> None:
self.api_key = api_key
self.model = model
self._client = client or self._build_client()
def _build_client(self) -> Any:
if genai is None:
raise RuntimeError("google-genai is not installed. Install it to use GeminiImageService.")
return genai.Client(api_key=self.api_key)
def _build_config(self, *, aspect_ratio: str, image_size: str) -> Any:
if genai_types is None:
return {
"response_modalities": ["TEXT", "IMAGE"],
"image_config": {
"aspect_ratio": aspect_ratio,
"image_size": image_size,
},
}
image_config_factory = getattr(genai_types, "ImageConfig", None)
generate_config_factory = getattr(genai_types, "GenerateContentConfig", None)
image_config = (
image_config_factory(aspect_ratio=aspect_ratio, image_size=image_size)
if callable(image_config_factory)
else {
"aspect_ratio": aspect_ratio,
"image_size": image_size,
}
)
if callable(generate_config_factory):
return generate_config_factory(
response_modalities=["TEXT", "IMAGE"],
image_config=image_config,
)
return {
"response_modalities": ["TEXT", "IMAGE"],
"image_config": image_config,
}
@staticmethod
def _extract_parts(response: Any) -> list[Any]:
direct_parts = getattr(response, "parts", None)
if direct_parts:
return list(direct_parts)
candidates = getattr(response, "candidates", None) or []
for candidate in candidates:
candidate_parts = getattr(getattr(candidate, "content", None), "parts", None)
if candidate_parts:
return list(candidate_parts)
return []
def generate_image(
self,
*,
prompt: str,
output_path: str,
reference_image_path: str | None = None,
aspect_ratio: str = "16:9",
image_size: str = "2K",
) -> dict[str, str]:
reference_image: PILImage.Image | None = None
try:
contents: list[Any] = [prompt]
if reference_image_path:
reference_image = PILImage.open(reference_image_path)
contents.append(reference_image)
response = self._client.models.generate_content(
model=self.model,
contents=contents,
config=self._build_config(aspect_ratio=aspect_ratio, image_size=image_size),
)
for part in self._extract_parts(response):
if getattr(part, "inline_data", None) is not None and hasattr(part, "as_image"):
output = Path(output_path)
output.parent.mkdir(parents=True, exist_ok=True)
part.as_image().save(str(output))
return {"status": "success", "path": str(output)}
text_parts = [str(part.text).strip() for part in self._extract_parts(response) if getattr(part, "text", None)]
message = "No image in API response."
if text_parts:
message = f"{message} {' '.join(text_parts)}"
return {"status": "error", "error": message}
except Exception as exc:
logger.exception("Gemini image generation failed.")
return {"status": "error", "error": str(exc)}
finally:
if reference_image is not None:
reference_image.close()

View File

@@ -42,11 +42,13 @@ def main(argv: list[str] | None = None) -> int:
data_go_kr_api_key = _prompt_value("DATA_GO_KR_API_KEY", imported.get("DATA_GO_KR_API_KEY", ""))
vworld_api_key = _prompt_value("VWORLD_API_KEY", imported.get("VWORLD_API_KEY", ""))
gemini_api_key = _prompt_value("GEMINI_API_KEY", imported.get("GEMINI_API_KEY", ""))
target = save_api_keys(
{
"DATA_GO_KR_API_KEY": data_go_kr_api_key,
"VWORLD_API_KEY": vworld_api_key,
"GEMINI_API_KEY": gemini_api_key,
}
)

View File

@@ -0,0 +1,129 @@
from __future__ import annotations
import logging
from pathlib import Path
from typing import Any
from civilplan_mcp.config import get_settings
from civilplan_mcp.models import ProjectDomain
from civilplan_mcp.prompts.birdseye_templates import build_prompt
from civilplan_mcp.services.gemini_image import GeminiImageService
from civilplan_mcp.tools._base import wrap_response
logger = logging.getLogger(__name__)
DOMAIN_TO_PROJECT_TYPE = {
"토목_도로": "road",
"건축": "building",
"토목_상하수도": "water",
"토목_하천": "river",
"조경": "landscape",
"복합": "mixed",
}
def _domain_to_project_type(domain: str) -> str:
return DOMAIN_TO_PROJECT_TYPE.get(domain, "mixed")
def _resolve_domain(domain: str | None) -> ProjectDomain:
try:
return ProjectDomain(domain or ProjectDomain.복합.value)
except ValueError:
return ProjectDomain.복합
def svg_to_png(svg_content: str, output_path: str) -> str:
import cairosvg
cairosvg.svg2png(bytestring=svg_content.encode("utf-8"), write_to=output_path)
return output_path
def generate_birdseye_view(
*,
project_summary: str,
project_spec: dict[str, Any],
svg_drawing: str | None = None,
resolution: str = "2K",
) -> dict[str, Any]:
settings = get_settings()
domain = _resolve_domain(project_spec.get("domain"))
project_id = project_spec.get("project_id", "birdseye-render")
if not settings.gemini_api_key:
return wrap_response(
{
"status": "error",
"project_id": project_id,
"error": "GEMINI_API_KEY is not configured. Add it to .env or store it with python setup_keys.py.",
},
domain,
)
output_dir = Path(project_spec.get("output_dir") or settings.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
details: dict[str, Any] = {}
if isinstance(project_spec.get("road"), dict):
details.update({key: value for key, value in project_spec["road"].items() if value is not None})
for key in ("terrain", "region", "utilities", "year_start", "year_end"):
value = project_spec.get(key)
if value not in (None, "", [], {}):
details[key] = value
reference_image_path: str | None = None
if svg_drawing:
try:
reference_image_path = svg_to_png(svg_drawing, str(output_dir / f"{project_id}_reference.png"))
except Exception as exc:
logger.warning("Failed to convert SVG reference for birdseye render: %s", exc)
service = GeminiImageService(api_key=settings.gemini_api_key)
project_type = _domain_to_project_type(domain.value)
birdseye_result = service.generate_image(
prompt=build_prompt(
view_type="birdseye",
project_type=project_type,
project_summary=project_summary,
details=details,
),
output_path=str(output_dir / f"{project_id}_birdseye.png"),
reference_image_path=reference_image_path,
aspect_ratio="16:9",
image_size=resolution,
)
perspective_result = service.generate_image(
prompt=build_prompt(
view_type="perspective",
project_type=project_type,
project_summary=project_summary,
details=details,
),
output_path=str(output_dir / f"{project_id}_perspective.png"),
reference_image_path=reference_image_path,
aspect_ratio="16:9",
image_size=resolution,
)
if birdseye_result["status"] == "success" and perspective_result["status"] == "success":
status = "success"
elif birdseye_result["status"] == "success" or perspective_result["status"] == "success":
status = "partial"
else:
status = "error"
return wrap_response(
{
"status": status,
"project_id": project_id,
"model": service.model,
"resolution": resolution,
"reference_image_path": reference_image_path,
"birdseye_view": birdseye_result,
"perspective_view": perspective_result,
},
domain,
)