feat(models): fetch live model lists from CLIs, allow custom ids

Each agent definition now declares an optional `listModels` spec; the daemon runs the CLI's own list-models command (e.g. `opencode models`, `cursor-agent models`) during agent detection and uses the result as the dropdown options. Hardcoded entries shrink to a `fallbackModels` hint that only kicks in when the CLI has no listing command (Claude, Codex, Gemini, Qwen) or when the listing fails (e.g. unauth'd cursor-agent). UI groups `provider/model` ids by provider via <optgroup> so opencode's ~175 live models stay navigable, and the Settings dialog gains a "Custom…" entry that opens a free-text input for any model id the listing didn't surface yet. Daemon validates picks against the live cache + fallback, with a permissive sanitizer for custom ids.
feat: per-CLI model picker for local agents (closes #8 )
2026-04-29 00:32:03 +08:00 · 2026-04-28 22:54:18 +08:00
36 changed files with 770 additions and 2932 deletions
@@ -6,25 +6,82 @@ import path from 'node:path';

 const execFileP = promisify(execFile);

-// Each entry defines how to invoke the agent in non-interactive "one-shot" mode.
-// `buildArgs(prompt, imagePaths, extraAllowedDirs)` returns argv for the child
-// process. `extraAllowedDirs` is a list of absolute directories the agent must
-// be permitted to read files from (skill seeds, design-system specs) that live
+// Per-agent model picker.
+//
+//   - `listModels`         : optional spec for fetching the model list from
+//                            the CLI itself ({ args, parse, timeoutMs }).
+//                            When defined we run it during agent detection
+//                            (best-effort, with a timeout) and use the
+//                            result. If the listing fails we fall back to
+//                            `fallbackModels` so the UI still has something
+//                            to show.
+//   - `fallbackModels`     : static hint list. Used as the source of truth
+//                            for CLIs that don't expose a listing command
+//                            (Claude Code, Codex, Gemini CLI, Qwen Code)
+//                            and as the fallback for the others.
+//   - `reasoningOptions`   : optional reasoning-effort presets (currently
+//                            only Codex exposes this knob).
+//   - `buildArgs(prompt, imagePaths, extraAllowedDirs, options)` returns
+//     argv for the child process. `options = { model, reasoning }` carries
+//     whatever the user picked in the model menu — agents that don't take a
+//     model flag ignore them.
+//
+// Every model list is prefixed with a synthetic `'default'` entry meaning
+// "let the CLI pick" — the agent runs with no `--model` flag, so the
+// user's local CLI config wins.
+//
+// `extraAllowedDirs` is a list of absolute directories the agent must be
+// permitted to read files from (skill seeds, design-system specs) that live
 // outside the project cwd. Currently only Claude Code wires this through
 // (`--add-dir`); other agents either inherit broader access or run with cwd
 // boundaries we can't widen via flags.
+//
 // `streamFormat` hints to the daemon how to interpret stdout:
 //   - 'claude-stream-json' : line-delimited JSON emitted by Claude Code's
 //     `--output-format stream-json`. Daemon parses it into typed events
 //     (text / thinking / tool_use / tool_result / status) for the UI.
 //   - 'plain' (default)    : raw text, forwarded chunk-by-chunk.
+
+const DEFAULT_MODEL_OPTION = { id: 'default', label: 'Default (CLI config)' };
+
+// Parse one-id-per-line stdout from `<cli> models` and prepend the synthetic
+// default option. Used by opencode / cursor-agent.
+function parseLineSeparatedModels(stdout) {
+  const ids = String(stdout || '')
+    .split('\n')
+    .map((line) => line.trim())
+    .filter((line) => line.length > 0 && !line.startsWith('#'));
+  // De-dupe while preserving order — some CLIs print near-duplicates.
+  const seen = new Set();
+  const out = [DEFAULT_MODEL_OPTION];
+  for (const id of ids) {
+    if (seen.has(id)) continue;
+    seen.add(id);
+    out.push({ id, label: id });
+  }
+  return out;
+}
+
 export const AGENT_DEFS = [
  {
    id: 'claude',
    name: 'Claude Code',
    bin: 'claude',
    versionArgs: ['--version'],
-    buildArgs: (prompt, _imagePaths, extraAllowedDirs = []) => {
+    // `claude` has no list-models subcommand; the CLI accepts both short
+    // aliases (sonnet/opus/haiku) and the full ids, so we ship both as
+    // hints. Users who want a non-shipped model can paste it via the
+    // Settings dialog's custom-model input.
+    fallbackModels: [
+      DEFAULT_MODEL_OPTION,
+      { id: 'sonnet', label: 'Sonnet (alias)' },
+      { id: 'opus', label: 'Opus (alias)' },
+      { id: 'haiku', label: 'Haiku (alias)' },
+      { id: 'claude-opus-4-5', label: 'claude-opus-4-5' },
+      { id: 'claude-sonnet-4-5', label: 'claude-sonnet-4-5' },
+      { id: 'claude-haiku-4-5', label: 'claude-haiku-4-5' },
+    ],
+    buildArgs: (prompt, _imagePaths, extraAllowedDirs = [], options = {}) => {
      const args = [
        '-p',
        prompt,
@@ -33,6 +90,9 @@ export const AGENT_DEFS = [
        '--verbose',
        '--include-partial-messages',
      ];
+      if (options.model && options.model !== 'default') {
+        args.push('--model', options.model);
+      }
      const dirs = (extraAllowedDirs || []).filter(
        (d) => typeof d === 'string' && d.length > 0,
      );
@@ -48,7 +108,35 @@ export const AGENT_DEFS = [
    name: 'Codex CLI',
    bin: 'codex',
    versionArgs: ['--version'],
-    buildArgs: (prompt) => ['exec', prompt],
+    // Codex doesn't have a `models` subcommand; ship the most common ids
+    // as a hint. Users can supply other ids via the custom-model input.
+    fallbackModels: [
+      DEFAULT_MODEL_OPTION,
+      { id: 'gpt-5-codex', label: 'gpt-5-codex' },
+      { id: 'gpt-5', label: 'gpt-5' },
+      { id: 'o3', label: 'o3' },
+      { id: 'o4-mini', label: 'o4-mini' },
+    ],
+    reasoningOptions: [
+      { id: 'default', label: 'Default' },
+      { id: 'minimal', label: 'Minimal' },
+      { id: 'low', label: 'Low' },
+      { id: 'medium', label: 'Medium' },
+      { id: 'high', label: 'High' },
+    ],
+    buildArgs: (prompt, _imagePaths, _extra, options = {}) => {
+      const args = ['exec'];
+      if (options.model && options.model !== 'default') {
+        args.push('--model', options.model);
+      }
+      if (options.reasoning && options.reasoning !== 'default') {
+        // Codex accepts `-c key=value` config overrides; reasoning effort
+        // is exposed as `model_reasoning_effort`.
+        args.push('-c', `model_reasoning_effort="${options.reasoning}"`);
+      }
+      args.push(prompt);
+      return args;
+    },
    streamFormat: 'plain',
  },
  {
@@ -56,7 +144,19 @@ export const AGENT_DEFS = [
    name: 'Gemini CLI',
    bin: 'gemini',
    versionArgs: ['--version'],
-    buildArgs: (prompt) => ['-p', prompt],
+    fallbackModels: [
+      DEFAULT_MODEL_OPTION,
+      { id: 'gemini-2.5-pro', label: 'gemini-2.5-pro' },
+      { id: 'gemini-2.5-flash', label: 'gemini-2.5-flash' },
+    ],
+    buildArgs: (prompt, _imagePaths, _extra, options = {}) => {
+      const args = [];
+      if (options.model && options.model !== 'default') {
+        args.push('--model', options.model);
+      }
+      args.push('-p', prompt);
+      return args;
+    },
    streamFormat: 'plain',
  },
  {
@@ -64,7 +164,26 @@ export const AGENT_DEFS = [
    name: 'OpenCode',
    bin: 'opencode',
    versionArgs: ['--version'],
-    buildArgs: (prompt) => ['run', prompt],
+    // `opencode models` prints `provider/model` per line.
+    listModels: {
+      args: ['models'],
+      parse: parseLineSeparatedModels,
+      timeoutMs: 8000,
+    },
+    fallbackModels: [
+      DEFAULT_MODEL_OPTION,
+      { id: 'anthropic/claude-sonnet-4-5', label: 'anthropic/claude-sonnet-4-5' },
+      { id: 'openai/gpt-5', label: 'openai/gpt-5' },
+      { id: 'google/gemini-2.5-pro', label: 'google/gemini-2.5-pro' },
+    ],
+    buildArgs: (prompt, _imagePaths, _extra, options = {}) => {
+      const args = ['run'];
+      if (options.model && options.model !== 'default') {
+        args.push('--model', options.model);
+      }
+      args.push(prompt);
+      return args;
+    },
    streamFormat: 'plain',
  },
  {
@@ -72,7 +191,33 @@ export const AGENT_DEFS = [
    name: 'Cursor Agent',
    bin: 'cursor-agent',
    versionArgs: ['--version'],
-    buildArgs: (prompt) => ['-p', prompt],
+    // `cursor-agent models` prints account-bound model ids per line. When
+    // the user isn't authed it prints "No models available for this
+    // account." — that's not a model list, so we detect it and fall back.
+    listModels: {
+      args: ['models'],
+      timeoutMs: 5000,
+      parse: (stdout) => {
+        const trimmed = String(stdout || '').trim();
+        if (!trimmed || /no models available/i.test(trimmed)) return null;
+        return parseLineSeparatedModels(trimmed);
+      },
+    },
+    fallbackModels: [
+      DEFAULT_MODEL_OPTION,
+      { id: 'auto', label: 'auto' },
+      { id: 'sonnet-4', label: 'sonnet-4' },
+      { id: 'sonnet-4-thinking', label: 'sonnet-4-thinking' },
+      { id: 'gpt-5', label: 'gpt-5' },
+    ],
+    buildArgs: (prompt, _imagePaths, _extra, options = {}) => {
+      const args = [];
+      if (options.model && options.model !== 'default') {
+        args.push('--model', options.model);
+      }
+      args.push('-p', prompt);
+      return args;
+    },
    streamFormat: 'plain',
  },
  {
@@ -80,7 +225,19 @@ export const AGENT_DEFS = [
    name: 'Qwen Code',
    bin: 'qwen',
    versionArgs: ['--version'],
-    buildArgs: (prompt) => ['-p', prompt],
+    fallbackModels: [
+      DEFAULT_MODEL_OPTION,
+      { id: 'qwen3-coder-plus', label: 'qwen3-coder-plus' },
+      { id: 'qwen3-coder-flash', label: 'qwen3-coder-flash' },
+    ],
+    buildArgs: (prompt, _imagePaths, _extra, options = {}) => {
+      const args = [];
+      if (options.model && options.model !== 'default') {
+        args.push('--model', options.model);
+      }
+      args.push('-p', prompt);
+      return args;
+    },
    streamFormat: 'plain',
  },
 ];
@@ -100,9 +257,36 @@ function resolveOnPath(bin) {
  return null;
 }

+async function fetchModels(def, resolvedBin) {
+  if (!def.listModels) return def.fallbackModels;
+  try {
+    const { stdout } = await execFileP(resolvedBin, def.listModels.args, {
+      timeout: def.listModels.timeoutMs ?? 5000,
+      // Models lists from popular CLIs (e.g. opencode) easily exceed the
+      // default 1MB buffer once you include every openrouter model. Bump
+      // it so we don't truncate the listing.
+      maxBuffer: 8 * 1024 * 1024,
+    });
+    const parsed = def.listModels.parse(stdout);
+    // Empty / null parse result means the CLI didn't actually return a
+    // usable list (e.g. cursor-agent's "No models available"); fall back
+    // to the static hint so the picker isn't stuck on Default-only.
+    if (!parsed || parsed.length === 0) return def.fallbackModels;
+    return parsed;
+  } catch {
+    return def.fallbackModels;
+  }
+}
+
 async function probe(def) {
  const resolved = resolveOnPath(def.bin);
-  if (!resolved) return { ...stripFns(def), available: false };
+  if (!resolved) {
+    return {
+      ...stripFns(def),
+      models: def.fallbackModels ?? [DEFAULT_MODEL_OPTION],
+      available: false,
+    };
+  }
  let version = null;
  try {
    const { stdout } = await execFileP(resolved, def.versionArgs, { timeout: 3000 });
@@ -110,18 +294,75 @@ async function probe(def) {
  } catch {
    // binary exists but --version failed; still mark available
  }
-  return { ...stripFns(def), available: true, path: resolved, version };
+  const models = await fetchModels(def, resolved);
+  return {
+    ...stripFns(def),
+    models,
+    available: true,
+    path: resolved,
+    version,
+  };
 }

 function stripFns(def) {
-  const { buildArgs, ...rest } = def;
+  // Drop the buildArgs / listModels closures but keep declarative metadata
+  // (reasoningOptions, streamFormat, name, bin, etc.). `models` is
+  // populated separately by `fetchModels`, so we strip the static
+  // `fallbackModels` slot here too.
+  const { buildArgs, listModels, fallbackModels, ...rest } = def;
  return rest;
 }

 export async function detectAgents() {
-  return Promise.all(AGENT_DEFS.map(probe));
+  const results = await Promise.all(AGENT_DEFS.map(probe));
+  // Refresh the validation cache from whatever we just surfaced to the UI
+  // so /api/chat can accept any model the user could have just picked,
+  // including ones that only showed up after a CLI re-auth.
+  for (const agent of results) {
+    rememberLiveModels(agent.id, agent.models);
+  }
+  return results;
 }

 export function getAgentDef(id) {
  return AGENT_DEFS.find((a) => a.id === id) || null;
 }
+
+// Daemon's /api/chat needs to validate the user's model pick against the
+// list we last surfaced to the UI. We keep a per-agent cache of the most
+// recent live list (refreshed every detectAgents() call) and additionally
+// trust any value present in the static fallback. A model that's neither
+// gets rejected so a stale or hostile value can't smuggle arbitrary flags.
+const liveModelCache = new Map();
+
+export function rememberLiveModels(agentId, models) {
+  if (!Array.isArray(models)) return;
+  liveModelCache.set(
+    agentId,
+    new Set(models.map((m) => m && m.id).filter((id) => typeof id === 'string')),
+  );
+}
+
+export function isKnownModel(def, modelId) {
+  if (!modelId) return false;
+  const live = liveModelCache.get(def.id);
+  if (live && live.has(modelId)) return true;
+  if (Array.isArray(def.fallbackModels)) {
+    return def.fallbackModels.some((m) => m.id === modelId);
+  }
+  return false;
+}
+
+// Permit user-typed model ids that didn't appear in either the live
+// listing or the static fallback (e.g. the user is on a brand-new model
+// the CLI's `models` command hasn't surfaced yet). The CLI gets the value
+// as a child-process arg — not a shell string — so injection isn't a
+// concern, but we still reject anything that could be misread as a flag
+// by a downstream CLI or that contains whitespace / control chars.
+export function sanitizeCustomModel(id) {
+  if (typeof id !== 'string') return null;
+  const trimmed = id.trim();
+  if (trimmed.length === 0 || trimmed.length > 200) return null;
+  if (!/^[A-Za-z0-9][A-Za-z0-9._/:@-]*$/.test(trimmed)) return null;
+  return trimmed;
+}
@@ -1,44 +1,24 @@
 #!/usr/bin/env node
 import { startServer } from './server.js';

-const argv = process.argv.slice(2);
-
-// ---- Subcommand router ----------------------------------------------------
-//
-// `od` is two CLIs glued together:
-//   - default mode: starts the daemon + opens the web UI.
-//   - `od media …`: a thin client that POSTs to the running daemon. This
-//     is what the code agent invokes from inside a chat to actually
-//     produce image / video / audio bytes (the unifying contract).
-//
-// We dispatch on the first positional argument so flags like --port keep
-// working unchanged. Subcommand routing is keyword-based; flags are
-// parsed inside each handler.
-
-const SUBCOMMAND_MAP = {
-  media: runMedia,
-};
-
-const first = argv.find((a) => !a.startsWith('-'));
-if (first && SUBCOMMAND_MAP[first]) {
-  const idx = argv.indexOf(first);
-  const rest = [...argv.slice(0, idx), ...argv.slice(idx + 1)];
-  await SUBCOMMAND_MAP[first](rest);
-  process.exit(0);
-}
-
-// Default: daemon mode.
+const args = process.argv.slice(2);
 let port = Number(process.env.OD_PORT) || 7456;
 let open = true;

-for (let i = 0; i < argv.length; i++) {
-  const a = argv[i];
+for (let i = 0; i < args.length; i++) {
+  const a = args[i];
  if (a === '-p' || a === '--port') {
-    port = Number(argv[++i]);
+    port = Number(args[++i]);
  } else if (a === '--no-open') {
    open = false;
  } else if (a === '-h' || a === '--help') {
-    printRootHelp();
+    console.log(`Usage: od [--port <n>] [--no-open]
+
+Starts a local daemon that:
+  * scans PATH for installed code-agent CLIs (claude, codex, gemini, opencode, cursor-agent, ...)
+  * serves a tiny web chat UI at http://localhost:<port>
+  * proxies messages (text + images) to the selected agent via child-process spawn
+`);
    process.exit(0);
  }
 }
@@ -54,134 +34,3 @@ startServer({ port }).then(url => {
    });
  }
 });
-
-function printRootHelp() {
-  console.log(`Usage:
-  od [--port <n>] [--no-open]
-      Start the local daemon and open the web UI.
-
-  od media generate --surface <image|video|audio> --model <id> [opts]
-      Generate a media artifact and write it into the active project.
-      Designed to be invoked by a code agent — picks up OD_DAEMON_URL
-      and OD_PROJECT_ID from the env that the daemon injected on spawn.
-
-What the daemon does:
-  * scans PATH for installed code-agent CLIs (claude, codex, gemini, opencode, cursor-agent, ...)
-  * serves the chat UI at http://localhost:<port>
-  * proxies messages (text + images) to the selected agent via child-process spawn
-  * exposes /api/projects/:id/media/generate — the unified image/video/audio
-    dispatcher that the agent calls via \`od media generate\`.`);
-}
-
-// ---------------------------------------------------------------------------
-// Subcommand: od media …
-// ---------------------------------------------------------------------------
-
-async function runMedia(args) {
-  const sub = args.find((a) => !a.startsWith('-')) || '';
-  if (sub === 'help' || sub === '-h' || sub === '--help' || sub === '') {
-    printMediaHelp();
-    return;
-  }
-  if (sub !== 'generate') {
-    console.error(`unknown subcommand: od media ${sub}`);
-    printMediaHelp();
-    process.exit(1);
-  }
-
-  const idx = args.indexOf(sub);
-  const flags = parseFlags([...args.slice(0, idx), ...args.slice(idx + 1)]);
-
-  const daemonUrl = flags['daemon-url'] || process.env.OD_DAEMON_URL || 'http://127.0.0.1:7456';
-  const projectId = flags.project || process.env.OD_PROJECT_ID;
-  if (!projectId) {
-    console.error(
-      'project id required. Pass --project <id> or set OD_PROJECT_ID. The daemon injects this when it spawns the code agent.',
-    );
-    process.exit(2);
-  }
-
-  const surface = flags.surface;
-  if (!surface || !['image', 'video', 'audio'].includes(surface)) {
-    console.error('--surface must be one of: image | video | audio');
-    process.exit(2);
-  }
-  if (!flags.model) {
-    console.error('--model required (see http://<daemon>/api/media/models)');
-    process.exit(2);
-  }
-
-  const body = {
-    surface,
-    model: flags.model,
-    prompt: flags.prompt,
-    output: flags.output,
-    aspect: flags.aspect,
-    voice: flags.voice,
-    audioKind: flags['audio-kind'],
-  };
-  if (flags.length != null) body.length = Number(flags.length);
-  if (flags.duration != null) body.duration = Number(flags.duration);
-
-  const url = `${daemonUrl.replace(/\/$/, '')}/api/projects/${encodeURIComponent(projectId)}/media/generate`;
-  let resp;
-  try {
-    resp = await fetch(url, {
-      method: 'POST',
-      headers: { 'content-type': 'application/json' },
-      body: JSON.stringify(body),
-    });
-  } catch (err) {
-    console.error(`failed to reach daemon at ${daemonUrl}: ${err.message}`);
-    process.exit(3);
-  }
-  const text = await resp.text();
-  if (!resp.ok) {
-    console.error(`daemon ${resp.status}: ${text}`);
-    process.exit(4);
-  }
-  // Print the JSON response as one line so the agent can parse it.
-  process.stdout.write(text.trim() + '\n');
-}
-
-function parseFlags(argv) {
-  const out = {};
-  for (let i = 0; i < argv.length; i++) {
-    const a = argv[i];
-    if (!a || !a.startsWith('--')) continue;
-    const key = a.slice(2);
-    const next = argv[i + 1];
-    if (next != null && !next.startsWith('--')) {
-      out[key] = next;
-      i++;
-    } else {
-      out[key] = true;
-    }
-  }
-  return out;
-}
-
-function printMediaHelp() {
-  console.log(`Usage: od media generate --surface <image|video|audio> --model <id> [opts]
-
-Required:
-  --surface  image | video | audio
-  --model    Model id from /api/media/models (e.g. gpt-image-2, seedance-2, suno-v5).
-  --project  Project id. Auto-resolved from OD_PROJECT_ID when invoked by the daemon.
-
-Common options:
-  --prompt "<text>"         Generation prompt.
-  --output <filename>       File to write under the project. Auto-named if omitted.
-  --aspect 1:1|16:9|9:16|4:3|3:4
-  --length <seconds>        Video length.
-  --duration <seconds>      Audio duration.
-  --voice <voice-id>        Speech / TTS voice.
-  --audio-kind music|speech|sfx
-  --daemon-url http://127.0.0.1:7456
-
-Output: a single line of JSON: {"file": { name, size, kind, mime, ... }}.
-
-Skills should call this and then reference the returned filename in their
-artifact / message body. The daemon writes the bytes into the project's
-files folder so the FileViewer can preview them immediately.`);
-}
@@ -29,11 +29,6 @@ export async function listDesignSystems(root) {
        category: extractCategory(raw) ?? 'Uncategorized',
        summary: summarize(raw),
        swatches: extractSwatches(raw),
-        // Optional `> Surface: image|video|audio` blockquote line. Most
-        // existing systems target the web surface and don't declare it;
-        // we default to 'web' so the right-side filter classifies them
-        // correctly.
-        surface: extractSurface(raw),
        body: raw,
      });
    } catch {
@@ -72,14 +67,6 @@ function extractCategory(raw) {
  return m?.[1];
 }

-const KNOWN_SURFACES = new Set(['web', 'image', 'video', 'audio']);
-function extractSurface(raw) {
-  const m = /^>\s*Surface:\s*(.+?)\s*$/im.exec(raw);
-  if (!m) return 'web';
-  const v = m[1].trim().toLowerCase();
-  return KNOWN_SURFACES.has(v) ? v : 'web';
-}
-
 // Strip boilerplate like "Design System Inspired by Cohere" → "Cohere" so
 // the picker dropdown reads cleanly. Hand-authored titles that don't match
 // the pattern (e.g. "Neutral Modern") pass through unchanged.
@@ -1,62 +0,0 @@
-// Daemon-side mirror of src/media/models.ts. We keep this in plain JS so
-// node imports are native and the daemon never needs a TS toolchain at
-// runtime. The two files are kept in sync by review — any model added to
-// src/media/models.ts must be added here too. Tests in verify ensure the
-// arrays are non-empty and IDs are unique.
-
-export const IMAGE_MODELS = [
-  { id: 'gpt-image-2', label: 'gpt-image-2', hint: 'OpenAI · default', caps: ['t2i', 'i2i', 'inpaint'] },
-  { id: 'flux-1.1-pro', label: 'flux-1.1-pro', hint: 'Black Forest Labs', caps: ['t2i', 'i2i'] },
-  { id: 'imagen-4', label: 'imagen-4', hint: 'Google', caps: ['t2i'] },
-  { id: 'midjourney-v7', label: 'midjourney-v7', hint: 'Midjourney', caps: ['t2i'] },
-];
-
-export const VIDEO_MODELS = [
-  { id: 'seedance-2', label: 'seedance-2', hint: 'ByteDance · default', caps: ['t2v', 'i2v'] },
-  { id: 'kling-3', label: 'kling-3', hint: 'Kuaishou', caps: ['t2v', 'i2v'] },
-  { id: 'kling-4', label: 'kling-4', hint: 'Kuaishou · latest', caps: ['t2v', 'i2v'] },
-  { id: 'veo-3', label: 'veo-3', hint: 'Google', caps: ['t2v'] },
-  { id: 'sora-2', label: 'sora-2', hint: 'OpenAI', caps: ['t2v'] },
-];
-
-export const AUDIO_MODELS_BY_KIND = {
-  music: [
-    { id: 'suno-v5', label: 'suno-v5', hint: 'Suno · default', caps: ['music'] },
-    { id: 'udio-v2', label: 'udio-v2', hint: 'Udio', caps: ['music'] },
-    { id: 'lyria-2', label: 'lyria-2', hint: 'Google', caps: ['music'] },
-  ],
-  speech: [
-    { id: 'minimax-tts', label: 'minimax-tts', hint: 'MiniMax · default', caps: ['tts'] },
-    { id: 'fish-speech-2', label: 'fish-speech-2', hint: 'FishAudio', caps: ['tts', 'voice-clone'] },
-    { id: 'elevenlabs-v3', label: 'elevenlabs-v3', hint: 'ElevenLabs', caps: ['tts', 'voice-clone'] },
-  ],
-  sfx: [
-    { id: 'elevenlabs-sfx', label: 'elevenlabs-sfx', hint: 'ElevenLabs SFX', caps: ['sfx'] },
-    { id: 'audiocraft', label: 'audiocraft', hint: 'Meta · open', caps: ['sfx', 'music'] },
-  ],
-};
-
-export const MEDIA_ASPECTS = ['1:1', '16:9', '9:16', '4:3', '3:4'];
-export const VIDEO_LENGTHS_SEC = [3, 5, 8, 10, 15, 30];
-export const AUDIO_DURATIONS_SEC = [5, 10, 15, 30, 60, 120];
-
-export function findMediaModel(id) {
-  const all = [
-    ...IMAGE_MODELS,
-    ...VIDEO_MODELS,
-    ...AUDIO_MODELS_BY_KIND.music,
-    ...AUDIO_MODELS_BY_KIND.speech,
-    ...AUDIO_MODELS_BY_KIND.sfx,
-  ];
-  return all.find((m) => m.id === id) || null;
-}
-
-export function modelsForSurface(surface, audioKind) {
-  if (surface === 'image') return IMAGE_MODELS;
-  if (surface === 'video') return VIDEO_MODELS;
-  if (surface === 'audio') {
-    const k = audioKind || 'music';
-    return AUDIO_MODELS_BY_KIND[k] || AUDIO_MODELS_BY_KIND.music;
-  }
-  return [];
-}
@@ -1,263 +0,0 @@
-// Media-generation dispatcher. The unifying contract is:
-//
-//   skills + metadata + system-prompt
-//        ↓ (the code agent decides what to make)
-//   `od media generate --surface … --model … --output … --prompt …`
-//        ↓ (this module routes to a provider)
-//   bytes written to <projectsRoot>/<projectId>/<output>
-//        ↓
-//   FileViewer renders it.
-//
-// Every surface (image / video / audio) flows through this single
-// entrypoint. Providers are pluggable: each file under ./media-providers/
-// (or inline below) registers handlers keyed by (surface, model). The
-// fallback handlers emit a deterministic, lightweight placeholder
-// (labeled SVG-PNG, silent WAV/MP3, blank MP4) so the framework works
-// without API keys — real provider integrations slot in later by
-// replacing the handler.
-
-import { mkdir, stat, writeFile } from 'node:fs/promises';
-import path from 'node:path';
-import { findMediaModel } from './media-models.js';
-import {
-  ensureProject,
-  kindFor,
-  mimeFor,
-  sanitizeName,
-} from './projects.js';
-
-const DEFAULT_OUTPUT_BY_SURFACE = {
-  image: 'image.png',
-  video: 'video.mp4',
-  audio: 'audio.mp3',
-};
-
-const SURFACES = new Set(['image', 'video', 'audio']);
-
-/**
- * Generate a media artifact and write it into the project's files dir.
- *
- * @param {Object} args
- * @param {string} args.projectsRoot - Absolute path to <repo>/.od/projects.
- * @param {string} args.projectId
- * @param {'image'|'video'|'audio'} args.surface
- * @param {string} args.model - Must be a registered model id.
- * @param {string} [args.prompt]
- * @param {string} [args.output] - Optional filename; auto-named if missing.
- * @param {string} [args.aspect] - 1:1 / 16:9 / 9:16 / 4:3 / 3:4
- * @param {number} [args.length] - Video length, seconds.
- * @param {number} [args.duration] - Audio duration, seconds.
- * @param {string} [args.voice]
- * @param {string} [args.audioKind] - music | speech | sfx
- * @returns {Promise<{ name: string, size: number, mtime: number, kind: string, mime: string, model: string, surface: string, providerNote: string }>}
- */
-export async function generateMedia(args) {
-  const {
-    projectsRoot,
-    projectId,
-    surface,
-    model,
-    prompt,
-    output,
-    aspect,
-    length,
-    duration,
-    voice,
-    audioKind,
-  } = args;
-
-  if (!projectsRoot) throw new Error('projectsRoot required');
-  if (typeof projectId !== 'string' || !projectId) {
-    throw new Error('projectId required');
-  }
-  if (!SURFACES.has(surface)) {
-    throw new Error(`unsupported surface: ${surface}`);
-  }
-  if (typeof model !== 'string' || !model) {
-    throw new Error('model required');
-  }
-  const def = findMediaModel(model);
-  if (!def) {
-    throw new Error(
-      `unknown model: ${model}. Pass --model from the registered list (see /api/media/models).`,
-    );
-  }
-
-  const dir = await ensureProject(projectsRoot, projectId);
-  const safeOut = sanitizeName(
-    output || autoOutputName(surface, model, audioKind),
-  );
-  const target = path.join(dir, safeOut);
-  await mkdir(path.dirname(target), { recursive: true });
-
-  const ctx = {
-    surface,
-    model,
-    prompt: prompt || '',
-    aspect: aspect || defaultAspectFor(surface),
-    length: typeof length === 'number' ? length : undefined,
-    duration: typeof duration === 'number' ? duration : undefined,
-    voice: voice || '',
-    audioKind: audioKind || (surface === 'audio' ? 'music' : undefined),
-  };
-
-  let bytes;
-  let providerNote;
-  if (surface === 'image') {
-    ({ bytes, providerNote } = await renderImage(ctx, safeOut));
-  } else if (surface === 'video') {
-    ({ bytes, providerNote } = await renderVideo(ctx, safeOut));
-  } else {
-    ({ bytes, providerNote } = await renderAudio(ctx, safeOut));
-  }
-
-  await writeFile(target, bytes);
-  const st = await stat(target);
-  return {
-    name: safeOut,
-    size: st.size,
-    mtime: st.mtimeMs,
-    kind: kindFor(safeOut),
-    mime: mimeFor(safeOut),
-    model,
-    surface,
-    providerNote,
-  };
-}
-
-function autoOutputName(surface, model, audioKind) {
-  const base = DEFAULT_OUTPUT_BY_SURFACE[surface] || 'artifact.bin';
-  const stamp = Date.now().toString(36);
-  const tag = surface === 'audio' && audioKind ? `${audioKind}-${model}` : model;
-  const dot = base.lastIndexOf('.');
-  const stem = dot > 0 ? base.slice(0, dot) : base;
-  const ext = dot > 0 ? base.slice(dot) : '';
-  return `${stem}-${tag}-${stamp}${ext}`;
-}
-
-function defaultAspectFor(surface) {
-  if (surface === 'image') return '1:1';
-  if (surface === 'video') return '16:9';
-  return undefined;
-}
-
-// ---------------------------------------------------------------------------
-// Provider stubs.
-//
-// Each renderer returns Buffer bytes that the caller writes to disk. They
-// produce real, lightweight placeholder media labelled with the model +
-// prompt so the user can verify which call was dispatched while the real
-// provider integrations are still pending. To replace a stub with a real
-// provider, swap the body — keep the (ctx, fileName) → { bytes, note }
-// shape so server.js doesn't change.
-
-async function renderImage(ctx, fileName) {
-  // SVG-as-image: write SVG bytes into a .png filename only when ext is
-  // svg; otherwise emit a tiny PNG that browsers can decode. We pick
-  // PNG-as-bytes by encoding the SVG inside a minimal PNG container —
-  // simpler: just write SVG XML into a .png, browsers can't render that.
-  // So instead: for png/jpg, emit a deterministic 1×1 PNG; for svg, emit
-  // a labelled SVG.
-  const ext = path.extname(fileName).toLowerCase();
-  if (ext === '.svg') {
-    return { bytes: Buffer.from(svgPlaceholder(ctx), 'utf8'), providerNote: 'svg-stub' };
-  }
-  // Minimal 1×1 transparent PNG. Real provider would emit a full image.
-  const png = Buffer.from(
-    [
-      0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00, 0x00, 0x0d,
-      0x49, 0x48, 0x44, 0x52, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
-      0x08, 0x06, 0x00, 0x00, 0x00, 0x1f, 0x15, 0xc4, 0x89, 0x00, 0x00, 0x00,
-      0x0d, 0x49, 0x44, 0x41, 0x54, 0x78, 0x9c, 0x63, 0x00, 0x01, 0x00, 0x00,
-      0x05, 0x00, 0x01, 0x0d, 0x0a, 0x2d, 0xb4, 0x00, 0x00, 0x00, 0x00, 0x49,
-      0x45, 0x4e, 0x44, 0xae, 0x42, 0x60, 0x82,
-    ],
-  );
-  return {
-    bytes: png,
-    providerNote: `stub-png · model=${ctx.model} · aspect=${ctx.aspect} · prompt=${truncate(ctx.prompt, 60)}`,
-  };
-}
-
-async function renderVideo(ctx, _fileName) {
-  // Tiny but valid mp4 (ftyp + minimal moov). Browsers without a video
-  // track will show 0 seconds, which is fine — this proves the dispatch
-  // round-trip; real Seedance/Kling/Veo providers replace this body.
-  const ftyp = Buffer.from([
-    0x00, 0x00, 0x00, 0x18, 0x66, 0x74, 0x79, 0x70, 0x69, 0x73, 0x6f, 0x6d,
-    0x00, 0x00, 0x02, 0x00, 0x69, 0x73, 0x6f, 0x6d, 0x69, 0x73, 0x6f, 0x32,
-  ]);
-  const mdat = Buffer.from([0x00, 0x00, 0x00, 0x08, 0x6d, 0x64, 0x61, 0x74]);
-  return {
-    bytes: Buffer.concat([ftyp, mdat]),
-    providerNote: `stub-mp4 · model=${ctx.model} · aspect=${ctx.aspect} · length=${ctx.length ?? '?'}s · prompt=${truncate(ctx.prompt, 60)}`,
-  };
-}
-
-async function renderAudio(ctx, fileName) {
-  const ext = path.extname(fileName).toLowerCase();
-  if (ext === '.wav') {
-    return {
-      bytes: silentWav(0.5),
-      providerNote: `stub-wav · model=${ctx.model} · kind=${ctx.audioKind} · duration=${ctx.duration ?? '?'}s`,
-    };
-  }
-  // Default: emit a near-empty mp3 frame header so the file is valid but
-  // tiny. Browsers may report 0:00; replace with real provider output.
-  const mp3 = Buffer.from([
-    0xff, 0xfb, 0x90, 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  ]);
-  return {
-    bytes: mp3,
-    providerNote: `stub-mp3 · model=${ctx.model} · kind=${ctx.audioKind} · voice=${ctx.voice || '-'} · duration=${ctx.duration ?? '?'}s`,
-  };
-}
-
-function svgPlaceholder(ctx) {
-  const [w, h] = aspectToBox(ctx.aspect, 800);
-  const safe = (s) =>
-    String(s || '')
-      .replace(/&/g, '&amp;')
-      .replace(/</g, '&lt;')
-      .replace(/>/g, '&gt;');
-  return [
-    `<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 ${w} ${h}" width="${w}" height="${h}">`,
-    `<rect width="${w}" height="${h}" fill="#0f1424"/>`,
-    `<text x="50%" y="50%" fill="#7da4ff" font-family="ui-sans-serif" font-size="20" text-anchor="middle">${safe(ctx.model)} — ${safe(ctx.prompt).slice(0, 60)}</text>`,
-    '</svg>',
-  ].join('');
-}
-
-function aspectToBox(aspect, base) {
-  const [a, b] = String(aspect || '1:1').split(':').map(Number);
-  if (!a || !b) return [base, base];
-  if (a >= b) return [base, Math.round((base * b) / a)];
-  return [Math.round((base * a) / b), base];
-}
-
-function silentWav(seconds) {
-  const sampleRate = 8000;
-  const numSamples = Math.max(1, Math.round(sampleRate * seconds));
-  const dataSize = numSamples * 2;
-  const buf = Buffer.alloc(44 + dataSize);
-  buf.write('RIFF', 0, 'ascii');
-  buf.writeUInt32LE(36 + dataSize, 4);
-  buf.write('WAVE', 8, 'ascii');
-  buf.write('fmt ', 12, 'ascii');
-  buf.writeUInt32LE(16, 16);
-  buf.writeUInt16LE(1, 20); // PCM
-  buf.writeUInt16LE(1, 22); // mono
-  buf.writeUInt32LE(sampleRate, 24);
-  buf.writeUInt32LE(sampleRate * 2, 28);
-  buf.writeUInt16LE(2, 32);
-  buf.writeUInt16LE(16, 34);
-  buf.write('data', 36, 'ascii');
-  buf.writeUInt32LE(dataSize, 40);
-  return buf;
-}
-
-function truncate(s, n) {
-  const v = String(s || '');
-  if (v.length <= n) return v;
-  return v.slice(0, n - 1) + '…';
-}
@@ -156,21 +156,6 @@ const EXT_MIME = {
  '.gif': 'image/gif',
  '.webp': 'image/webp',
  '.avif': 'image/avif',
-  // Video — covered MIMEs are the formats most generators emit. Browsers
-  // play them via <video> / <audio> in the FileViewer with no transcode.
-  '.mp4': 'video/mp4',
-  '.m4v': 'video/mp4',
-  '.webm': 'video/webm',
-  '.mov': 'video/quicktime',
-  // Audio — music / TTS generators commonly produce mp3 / wav / ogg /
-  // m4a; flac is rarer but cheap to support.
-  '.mp3': 'audio/mpeg',
-  '.wav': 'audio/wav',
-  '.ogg': 'audio/ogg',
-  '.oga': 'audio/ogg',
-  '.m4a': 'audio/mp4',
-  '.flac': 'audio/flac',
-  '.aac': 'audio/aac',
 };

 export function mimeFor(name) {
@@ -190,10 +175,6 @@ export function kindFor(name) {
    if (name.startsWith('sketch-')) return 'sketch';
    return 'image';
  }
-  if (['.mp4', '.m4v', '.webm', '.mov'].includes(ext)) return 'video';
-  if (['.mp3', '.wav', '.ogg', '.oga', '.m4a', '.flac', '.aac'].includes(ext)) {
-    return 'audio';
-  }
  if (['.md', '.txt'].includes(ext)) return 'text';
  if (['.js', '.mjs', '.cjs', '.ts', '.tsx', '.json', '.css'].includes(ext)) {
    return 'code';
@@ -6,7 +6,12 @@ import { fileURLToPath } from 'node:url';
 import path from 'node:path';
 import fs from 'node:fs';
 import os from 'node:os';
-import { detectAgents, getAgentDef } from './agents.js';
+import {
+  detectAgents,
+  getAgentDef,
+  isKnownModel,
+  sanitizeCustomModel,
+} from './agents.js';
 import { listSkills } from './skills.js';
 import { listDesignSystems, readDesignSystem } from './design-systems.js';
 import { createClaudeStreamHandler } from './claude-stream.js';
@@ -22,15 +27,6 @@ import {
  sanitizeName,
  writeProjectFile,
 } from './projects.js';
-import { generateMedia } from './media.js';
-import {
-  AUDIO_MODELS_BY_KIND,
-  IMAGE_MODELS,
-  VIDEO_MODELS,
-  MEDIA_ASPECTS,
-  VIDEO_LENGTHS_SEC,
-  AUDIO_DURATIONS_SEC,
-} from './media-models.js';
 import {
  deleteConversation,
  deleteProject as dbDeleteProject,
@@ -59,10 +55,6 @@ const PROJECT_ROOT = path.resolve(__dirname, '..');
 const STATIC_DIR = path.join(PROJECT_ROOT, 'dist');
 const SKILLS_DIR = path.join(PROJECT_ROOT, 'skills');
 const DESIGN_SYSTEMS_DIR = path.join(PROJECT_ROOT, 'design-systems');
-// Absolute path to the daemon CLI entry. We inject this into the spawned
-// agent's env as OD_BIN so the agent can run `node "$OD_BIN" media generate …`
-// regardless of whether the user has `od` on PATH.
-const OD_BIN_PATH = path.join(__dirname, 'cli.js');
 const ARTIFACTS_DIR = path.join(PROJECT_ROOT, '.od', 'artifacts');
 const PROJECTS_DIR = path.join(PROJECT_ROOT, '.od', 'projects');
 fs.mkdirSync(PROJECTS_DIR, { recursive: true });
@@ -663,56 +655,6 @@ export async function startServer({ port = 7456 } = {}) {
    }
  });

-  // ---- Media generation -----------------------------------------------------
-  //
-  // Surface-agnostic media dispatcher. The code agent reaches this via
-  // `od media generate` (see daemon/cli.js media subcommand), which is
-  // the unified contract: skills + metadata + system-prompt instruct the
-  // agent on WHAT to produce, the agent invokes ONE entrypoint that
-  // dispatches per (surface, model) and writes the bytes into the project.
-  // The shape of the response matches POST /api/projects/:id/files so the
-  // frontend can refresh the file list with the same code path.
-
-  app.get('/api/media/models', (_req, res) => {
-    res.json({
-      image: IMAGE_MODELS,
-      video: VIDEO_MODELS,
-      audio: AUDIO_MODELS_BY_KIND,
-      aspects: MEDIA_ASPECTS,
-      videoLengthsSec: VIDEO_LENGTHS_SEC,
-      audioDurationsSec: AUDIO_DURATIONS_SEC,
-    });
-  });
-
-  app.post('/api/projects/:id/media/generate', async (req, res) => {
-    try {
-      const projectId = req.params.id;
-      // Ensure the project exists in DB before writing files; this gives
-      // a friendly 404 when the agent calls with a bad id. The agent
-      // normally inherits OD_PROJECT_ID from spawn env so this should
-      // always resolve.
-      const project = getProject(db, projectId);
-      if (!project) return res.status(404).json({ error: 'project not found' });
-      const meta = await generateMedia({
-        projectsRoot: PROJECTS_DIR,
-        projectId,
-        surface: req.body?.surface,
-        model: req.body?.model,
-        prompt: req.body?.prompt,
-        output: req.body?.output,
-        aspect: req.body?.aspect,
-        length: typeof req.body?.length === 'number' ? req.body.length : undefined,
-        duration:
-          typeof req.body?.duration === 'number' ? req.body.duration : undefined,
-        voice: req.body?.voice,
-        audioKind: req.body?.audioKind,
-      });
-      res.json({ file: meta });
-    } catch (err) {
-      res.status(400).json({ error: String(err && err.message ? err.message : err) });
-    }
-  });
-
  // Multi-file upload that the chat composer uses for paste/drop/picker.
  // Files land flat in the project folder; the response carries the same
  // metadata as listFiles so the client can stage them as ChatAttachments
@@ -753,6 +695,8 @@ export async function startServer({ port = 7456 } = {}) {
      imagePaths = [],
      projectId,
      attachments = [],
+      model,
+      reasoning,
    } = req.body || {};
    const def = getAgentDef(agentId);
    if (!def) return res.status(400).json({ error: `unknown agent: ${agentId}` });
@@ -842,7 +786,23 @@ export async function startServer({ port = 7456 } = {}) {
    const extraAllowedDirs = [SKILLS_DIR, DESIGN_SYSTEMS_DIR].filter(
      (d) => fs.existsSync(d),
    );
-    const args = def.buildArgs(composed, safeImages, extraAllowedDirs);
+    // Per-agent model + reasoning the user picked in the model menu.
+    // Trust the value when it matches the most recent /api/agents listing
+    // (live or fallback). Otherwise allow it through if it passes a
+    // permissive sanitizer — that's the path for user-typed custom model
+    // ids the CLI's listing didn't surface yet.
+    const safeModel =
+      typeof model === 'string'
+        ? isKnownModel(def, model)
+          ? model
+          : sanitizeCustomModel(model)
+        : null;
+    const safeReasoning =
+      typeof reasoning === 'string' && Array.isArray(def.reasoningOptions)
+        ? def.reasoningOptions.find((r) => r.id === reasoning)?.id ?? null
+        : null;
+    const agentOptions = { model: safeModel, reasoning: safeReasoning };
+    const args = def.buildArgs(composed, safeImages, extraAllowedDirs, agentOptions);

    res.setHeader('Content-Type', 'text/event-stream');
    res.setHeader('Cache-Control', 'no-cache, no-transform');
@@ -861,22 +821,14 @@ export async function startServer({ port = 7456 } = {}) {
      streamFormat: def.streamFormat ?? 'plain',
      projectId: typeof projectId === 'string' ? projectId : null,
      cwd,
+      model: safeModel,
+      reasoning: safeReasoning,
    });

-    // Inject the OD context. Skills + the media-contract prompt tell the
-    // agent how to spend this — call `node "$OD_BIN" media generate
-    // --project "$OD_PROJECT_ID" …` and the daemon dispatches.
-    const odEnv = {
-      OD_BIN: OD_BIN_PATH,
-      OD_DAEMON_URL: `http://127.0.0.1:${port}`,
-      OD_PROJECT_ID: typeof projectId === 'string' ? projectId : '',
-      OD_PROJECT_DIR: cwd || '',
-    };
-
    let child;
    try {
      child = spawn(def.bin, args, {
-        env: { ...process.env, ...odEnv },
+        env: { ...process.env },
        stdio: ['ignore', 'pipe', 'pipe'],
        cwd: cwd || undefined,
      });
@@ -25,16 +25,12 @@ export async function listSkills(skillsRoot) {
      const { data, body } = parseFrontmatter(raw);
      const hasAttachments = await dirHasAttachments(dir);
      const mode = data.od?.mode || inferMode(body, data.description);
-      const surface = normalizeSurface(data.od?.surface, mode);
      out.push({
        id: data.name || entry.name,
        name: data.name || entry.name,
        description: data.description || "",
        triggers: Array.isArray(data.triggers) ? data.triggers : [],
        mode,
-        // Surface defaults to inferring from `mode` so legacy SKILL.md
-        // files (no `od.surface` declared) keep classifying correctly.
-        surface,
        platform: normalizePlatform(
          data.od?.platform,
          mode,
@@ -163,20 +159,6 @@ function inferMode(body, description) {
  return "prototype";
 }

-// Surface is the high-level output bucket — web, image, video or audio.
-// Authors can pin it via `od.surface`; otherwise we derive from `mode`,
-// then fall back to the safe default ('web') so existing skills classify
-// unchanged.
-const KNOWN_SURFACES = new Set(["web", "image", "video", "audio"]);
-function normalizeSurface(value, mode) {
-  if (typeof value === "string") {
-    const v = value.trim().toLowerCase();
-    if (KNOWN_SURFACES.has(v)) return v;
-  }
-  if (mode === "image" || mode === "video" || mode === "audio") return mode;
-  return "web";
-}
-
 // Validate platform tag — only desktop / mobile are meaningful for the
 // Examples gallery. Falls back to autodetecting "mobile" from descriptions
 // so legacy skills sort under the right pill without authoring changes.
@@ -1,121 +0,0 @@
---
-name: audio-jingle
-description: |
-  Audio generation skill — jingles, beds, voiceover, and sound effects.
-  Routes music requests to Suno V5 / Udio / Lyria, speech to MiniMax
-  TTS / FishAudio / ElevenLabs V3, and SFX to ElevenLabs SFX or
-  AudioCraft. Output is one MP3/WAV file saved to the project folder.
-triggers:
-  - "music"
-  - "jingle"
-  - "bed"
-  - "voiceover"
-  - "tts"
-  - "sound effect"
-  - "音乐"
-  - "配音"
-  - "音效"
-od:
-  mode: audio
-  surface: audio
-  scenario: marketing
-  preview:
-    type: html
-    entry: example.html
-  design_system:
-    requires: false
-  example_prompt: |
-    A 30-second upbeat indie-pop jingle for a coffee shop launch — warm
-    electric piano lead, brushed drums, gentle bass, a single sun-soaked
-    "ahhh" choir on the chorus. No vocals. Loop-friendly tail.
---
-
-# Audio Jingle Skill
-
-Three sub-modes. The active project's `audioKind` decides which one
-runs:
-
-| `audioKind` | Models we route to | Plan focus |
-|---|---|---|
-| `music` | Suno V5 (default), Udio, Lyria 2 | genre + tempo + instrumentation |
-| `speech` | MiniMax TTS (default), Fish, ElevenLabs V3 | script + voice + pacing |
-| `sfx` | ElevenLabs SFX (default), AudioCraft | texture + impact + duration |
-
-## Resource map
-
-```
-audio-jingle/
-├── SKILL.md
-└── example.html
-```
-
-## Workflow
-
-### Step 0 — Read the project metadata
-
-`audioKind`, `audioModel`, `audioDuration` (seconds), and (for speech)
-`voice`. Branch by `audioKind` and use the values verbatim — no
-clarifying form unless something is marked `(unknown — ask)`.
-
-### Step 1 — Plan
-
-**Music**
- Genre + reference artists (1-2)
- Tempo (BPM) + key
- Instrumentation (3-5 instruments max)
- Vocals: yes / no / hummed / choir
- Mood arc (intro → chorus → outro)
-
-**Speech**
- Script (final, not draft — TTS runs verbatim)
- Voice description (warmth, age, accent, pacing)
- Pronunciation hints for proper nouns / acronyms
-
-**SFX**
- Texture (impact / whoosh / ambience / foley)
- Duration + envelope (sharp attack vs. gentle swell)
- Layering note (single hit vs. stacked)
-
-State the plan in 2-3 sentences before dispatching.
-
-### Step 2 — Compose the prompt
-
-Use the format the upstream model prefers. Bind `audioDuration` to the
-API parameter directly; never put "make it 30 seconds" in prose.
-
-### Step 3 — Dispatch via the media contract
-
-Use the unified dispatcher — do **not** call provider APIs by hand:
-
-```bash
-node "$OD_BIN" media generate \
-  --project "$OD_PROJECT_ID" \
-  --surface audio \
-  --audio-kind "<music|speech|sfx>" \
-  --model "<audioModel from metadata>" \
-  --duration <audioDuration seconds> \
-  --voice "<voice (speech only)>" \
-  --output "<short-slug>-<duration>s.mp3" \
-  --prompt "<assembled prompt from Step 2 — for speech, the literal script>"
-```
-
-The command prints one line of JSON: `{"file": {"name": "...", ...}}`.
-The bytes land in the project; the FileViewer renders the audio
-transport controls automatically.
-
-### Step 4 — Hand off
-
-Reply with: plan summary, the filename returned by the dispatcher, and
-one sentence on what to try if the user wants a variation (e.g. "swap
-tempo from 92 to 108 BPM" rather than "make it different").
-
-## Hard rules
-
- TTS runs your script **literally**. Proof it before dispatching —
-  even one stray comma changes the cadence.
- Music: under 30s = single section; 30–90s = intro + body; 90s+ =
-  full arc. Don't try to fit a 3-act song into 15 seconds.
- SFX: prefer one well-described layer over a paragraph of "make it
-  cool" — generators reward specific texture words.
- Save the file every turn. The audio viewer shows transport controls
-  the moment the file lands.
@@ -1,128 +0,0 @@
-<!doctype html>
-<html lang="en">
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1" />
-    <title>Audio jingle — example</title>
-    <style>
-      :root {
-        --bg: #f5efe5;
-        --panel: #ffffff;
-        --ink: #1c1b1a;
-        --muted: #8b8579;
-        --accent: #c96442;
-        --grid: #e6dfd1;
-      }
-      * { box-sizing: border-box; }
-      html, body { margin: 0; padding: 0; background: var(--bg); color: var(--ink);
-        font-family: 'Iowan Old Style', 'Charter', Georgia, serif; }
-      body { min-height: 100dvh; display: grid; place-items: center; padding: 32px; }
-      .card {
-        width: min(640px, 92vw);
-        background: var(--panel);
-        border-radius: 8px;
-        padding: 26px 28px 22px;
-        box-shadow: 0 16px 40px rgba(28,27,26,0.10), 0 1px 2px rgba(28,27,26,0.05);
-        border: 1px solid rgba(28,27,26,0.06);
-      }
-      .row1 { display: flex; align-items: center; gap: 14px; margin-bottom: 18px; }
-      .icon {
-        width: 44px; height: 44px; border-radius: 50%;
-        background: var(--accent); color: #fff;
-        display: grid; place-items: center;
-        box-shadow: 0 6px 18px rgba(201, 100, 66, 0.35);
-      }
-      .icon svg { width: 22px; height: 22px; }
-      .title { margin: 0; font-size: 20px; line-height: 1.2; }
-      .sub { font-family: ui-monospace, 'SF Mono', Menlo, monospace;
-        font-size: 11px; color: var(--muted); letter-spacing: 0.14em; text-transform: uppercase; margin-top: 2px; }
-
-      .wave {
-        display: flex; align-items: end; gap: 3px;
-        height: 96px; padding: 0 4px;
-        border-top: 1px dashed var(--grid);
-        border-bottom: 1px dashed var(--grid);
-      }
-      .wave span {
-        flex: 1; background: linear-gradient(180deg, var(--accent), #a4502f);
-        border-radius: 2px;
-        animation: bob 2s ease-in-out infinite;
-        animation-delay: var(--d, 0s);
-      }
-      @keyframes bob {
-        0%, 100% { height: var(--h, 30%); }
-        50% { height: calc(var(--h, 30%) * 1.6); }
-      }
-
-      .transport {
-        margin-top: 14px;
-        display: grid; grid-template-columns: auto 1fr auto auto; gap: 12px;
-        align-items: center;
-      }
-      .play {
-        width: 36px; height: 36px; border-radius: 50%;
-        background: var(--ink); color: #fff;
-        display: grid; place-items: center;
-      }
-      .timeline {
-        height: 4px; border-radius: 2px;
-        background: linear-gradient(90deg, var(--accent) 0 32%, var(--grid) 32% 100%);
-      }
-      .time {
-        font-family: ui-monospace, 'SF Mono', Menlo, monospace;
-        font-size: 11px; color: var(--muted);
-        letter-spacing: 0.08em;
-      }
-      .badge {
-        font-family: ui-monospace, 'SF Mono', Menlo, monospace;
-        font-size: 10px; color: var(--accent);
-        letter-spacing: 0.18em; text-transform: uppercase;
-        padding: 4px 8px; border-radius: 999px;
-        background: rgba(201, 100, 66, 0.1);
-      }
-    </style>
-  </head>
-  <body>
-    <div class="card">
-      <div class="row1">
-        <div class="icon" aria-hidden>
-          <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><path d="M9 18V5l12-2v13"/><circle cx="6" cy="18" r="3"/><circle cx="18" cy="16" r="3"/></svg>
-        </div>
-        <div>
-          <h1 class="title">A 30s coffee-shop launch jingle.</h1>
-          <div class="sub">suno-v5 · 92 BPM · loop-friendly tail</div>
-        </div>
-      </div>
-      <div class="wave" aria-hidden>
-        <span style="--h:24%;--d:0s"></span>
-        <span style="--h:38%;--d:.05s"></span>
-        <span style="--h:52%;--d:.1s"></span>
-        <span style="--h:64%;--d:.15s"></span>
-        <span style="--h:48%;--d:.2s"></span>
-        <span style="--h:70%;--d:.25s"></span>
-        <span style="--h:42%;--d:.3s"></span>
-        <span style="--h:58%;--d:.35s"></span>
-        <span style="--h:36%;--d:.4s"></span>
-        <span style="--h:62%;--d:.45s"></span>
-        <span style="--h:26%;--d:.5s"></span>
-        <span style="--h:50%;--d:.55s"></span>
-        <span style="--h:34%;--d:.6s"></span>
-        <span style="--h:46%;--d:.65s"></span>
-        <span style="--h:58%;--d:.7s"></span>
-        <span style="--h:30%;--d:.75s"></span>
-        <span style="--h:44%;--d:.8s"></span>
-        <span style="--h:54%;--d:.85s"></span>
-        <span style="--h:28%;--d:.9s"></span>
-        <span style="--h:48%;--d:.95s"></span>
-      </div>
-      <div class="transport">
-        <div class="play" aria-hidden>
-          <svg viewBox="0 0 24 24" width="14" height="14" fill="currentColor"><path d="M6 4v16l14-8z"/></svg>
-        </div>
-        <div class="timeline" aria-hidden></div>
-        <span class="time">00:09 / 00:30</span>
-        <span class="badge">MP3</span>
-      </div>
-    </div>
-  </body>
-</html>
@@ -1,104 +0,0 @@
---
-name: image-poster
-description: |
-  Single-image generation skill for posters, key art, and editorial
-  illustrations. Defaults to gpt-image-2 but is provider-agnostic — the
-  same workflow drives Flux, Imagen, or Midjourney via the active
-  upstream tooling. Output is one or more PNG/JPEG files saved to the
-  project folder.
-triggers:
-  - "poster"
-  - "key art"
-  - "illustration"
-  - "image"
-  - "cover art"
-  - "海报"
-  - "插画"
-od:
-  mode: image
-  surface: image
-  scenario: design
-  preview:
-    type: html
-    entry: example.html
-  design_system:
-    requires: false
-  example_prompt: |
-    Editorial poster for an indie film festival — one bold abstract
-    silhouette over a warm, slightly grainy paper background; hand-set
-    sans serif title at the top, festival dates and venue at the bottom
-    in monospace. Muted ochre + ink palette.
---
-
-# Image Poster Skill
-
-Produce **one** finished image asset per turn unless the user asks for
-variations. Image generation rewards a tight, structured prompt — your
-job is to assemble that prompt from the user's brief, then dispatch.
-
-## Resource map
-
-```
-image-poster/
-├── SKILL.md         ← you're reading this
-└── example.html     ← what the resulting card looks like in Examples
-```
-
-## Workflow
-
-### Step 0 — Read the project metadata
-
-The active project carries `imageModel`, `imageAspect`, and (optional)
-`imageStyle` notes. Use them as the upstream model + canvas + style
-anchor; only ask the user to fill them in if they're marked `(unknown
-— ask)`.
-
-### Step 1 — Compose the prompt
-
-Plan in this exact order before calling any tool:
-
-1. **Subject + composition** — what is in the frame, where, at what
-   scale; eye-line and crop.
-2. **Lighting + mood** — natural / studio / moody; warm / cool; key
-   plus rim plus fill; time of day if outdoor.
-3. **Palette + textures** — hex anchors when the user gave a brand
-   palette; otherwise a 3-word mood tag (e.g. "muted ochre + ink").
-4. **Camera / lens** — only if the user wants photographic realism
-   ("85mm portrait, shallow DOF") or a specific film stock.
-5. **What to avoid** — common AI-slop patterns ("no extra fingers, no
-   warped text, no logo placeholders").
-
-### Step 2 — Dispatch via the media contract
-
-Use the unified dispatcher — do **not** call upstream provider APIs by
-hand. Run from your shell tool:
-
-```bash
-node "$OD_BIN" media generate \
-  --project "$OD_PROJECT_ID" \
-  --surface image \
-  --model "<imageModel from metadata>" \
-  --aspect "<imageAspect from metadata>" \
-  --output "<short-descriptive-name>.png" \
-  --prompt "<the full assembled prompt from Step 1>"
-```
-
-The command prints one line of JSON: `{"file": {"name": "...", ...}}`.
-The daemon writes the bytes into the project folder; the FileViewer
-picks it up automatically.
-
-### Step 3 — Hand off
-
-Reply with a one-paragraph summary of the prompt you used and the
-filename returned by the dispatcher (e.g. *I generated `hero-poster.png`
-with `gpt-image-2` at 1:1.*). Do **not** emit an `<artifact>` tag.
-
-## Hard rules
-
- One image per turn unless asked for variations.
- Honor `imageAspect` exactly — the upstream cost is the same; matching
-  the aspect avoids a re-render.
- No filler typography in the image itself unless the user asked for
-  in-frame text. Real copy beats lorem.
- Save every render — never describe an image without producing the
-  file. The user expects something to open in the file viewer.
@@ -1,113 +0,0 @@
-<!doctype html>
-<html lang="en">
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1" />
-    <title>Image poster — example</title>
-    <style>
-      :root {
-        --bg: #f5efe5;
-        --ink: #1c1b1a;
-        --accent: #c96442;
-        --muted: #8b8579;
-        --paper: #efe7d7;
-      }
-      * { box-sizing: border-box; }
-      html, body { margin: 0; padding: 0; background: var(--bg); color: var(--ink);
-        font-family: 'Iowan Old Style', 'Charter', Georgia, serif; }
-      body { min-height: 100dvh; display: grid; place-items: center; padding: 32px; }
-      .poster {
-        width: min(640px, 92vw);
-        aspect-ratio: 3 / 4;
-        background: var(--paper);
-        border: 1px solid rgba(28, 27, 26, 0.08);
-        border-radius: 6px;
-        box-shadow: 0 16px 48px rgba(28, 27, 26, 0.12), 0 1px 2px rgba(28, 27, 26, 0.06);
-        display: grid;
-        grid-template-rows: auto 1fr auto;
-        padding: 38px 32px;
-        position: relative;
-        overflow: hidden;
-      }
-      .poster::after {
-        content: '';
-        position: absolute; inset: 0;
-        pointer-events: none;
-        background:
-          radial-gradient(circle at 30% 18%, rgba(255,255,255,0.7), transparent 60%),
-          repeating-linear-gradient(0deg, rgba(28,27,26,0.025) 0 1px, transparent 1px 2px);
-      }
-      .eyebrow {
-        font-family: ui-monospace, 'SF Mono', Menlo, monospace;
-        font-size: 11px;
-        letter-spacing: 0.18em;
-        text-transform: uppercase;
-        color: var(--muted);
-        display: flex;
-        justify-content: space-between;
-        align-items: center;
-      }
-      .accent-dot {
-        width: 8px; height: 8px; border-radius: 50%;
-        background: var(--accent);
-      }
-      .silhouette {
-        align-self: center;
-        justify-self: center;
-        width: 70%;
-        aspect-ratio: 1 / 1;
-        position: relative;
-      }
-      .silhouette svg { width: 100%; height: 100%; display: block; }
-      .meta {
-        font-family: ui-monospace, 'SF Mono', Menlo, monospace;
-        font-size: 10.5px;
-        letter-spacing: 0.14em;
-        text-transform: uppercase;
-        color: var(--muted);
-        display: grid;
-        grid-template-columns: 1fr auto 1fr;
-        gap: 12px;
-        align-items: end;
-      }
-      .meta strong { color: var(--ink); font-weight: 600; }
-      .title {
-        font-size: 44px;
-        line-height: 0.95;
-        margin: 18px 0 0;
-        letter-spacing: -0.01em;
-      }
-      .title em { font-style: italic; color: var(--accent); }
-      .footer {
-        margin-top: 12px;
-        font-size: 13px;
-        color: var(--muted);
-        font-family: ui-monospace, 'SF Mono', Menlo, monospace;
-      }
-    </style>
-  </head>
-  <body>
-    <div class="poster">
-      <div class="eyebrow">
-        <span>Open Design · Image</span>
-        <span class="accent-dot" aria-hidden></span>
-      </div>
-      <div class="silhouette" aria-hidden>
-        <svg viewBox="0 0 100 100">
-          <circle cx="50" cy="38" r="18" fill="#1c1b1a" />
-          <path d="M22 100 C 22 70, 78 70, 78 100 Z" fill="#1c1b1a" />
-          <circle cx="68" cy="22" r="6" fill="#c96442" />
-        </svg>
-      </div>
-      <div>
-        <h1 class="title">An <em>image</em> project<br />produced by the agent.</h1>
-        <div class="meta">
-          <span><strong>gpt-image-2</strong></span>
-          <span>·</span>
-          <span style="text-align:right">3:4 · poster</span>
-        </div>
-        <p class="footer">Saved as PNG into the project folder.</p>
-      </div>
-    </div>
-  </body>
-</html>
@@ -1,108 +0,0 @@
---
-name: video-shortform
-description: |
-  Short-form video generation skill — 3-10 second clips for product
-  reveals, motion teasers, ambient loops. Defaults to Seedance 2 but
-  works the same with Kling 3 / 4, Veo 3 or Sora 2. Output is one MP4
-  saved to the project folder. When the workspace also ships an
-  interactive-video / hyperframes skill, prefer composing several short
-  shots into a single timeline rather than one long monolithic clip.
-triggers:
-  - "video"
-  - "clip"
-  - "shortform"
-  - "reel"
-  - "短视频"
-  - "动效"
-od:
-  mode: video
-  surface: video
-  scenario: marketing
-  preview:
-    type: html
-    entry: example.html
-  design_system:
-    requires: false
-  example_prompt: |
-    5-second product reveal — ceramic coffee mug rotating on a soft
-    paper backdrop, warm side-light from camera-left, micro dust motes
-    drifting through the beam. Cinematic, 16:9, slow drift on the camera.
---
-
-# Video Shortform Skill
-
-Short-form (≤ 10s) is the sweet spot for current text-to-video models —
-they're great at one **shot** with one **idea**, weaker at multi-cut
-narratives. Plan one shot per call.
-
-## Resource map
-
-```
-video-shortform/
-├── SKILL.md
-└── example.html
-```
-
-## Workflow
-
-### Step 0 — Read the project metadata
-
-`videoModel`, `videoLength` (seconds), `videoAspect`. These are
-hard-locks — clamp the prompt to whatever the chosen model supports
-(Seedance 2 caps at 10s; Kling 4 supports up to 10s + image-to-video;
-Veo 3 supports 8s with audio).
-
-### Step 1 — Plan the shot
-
-Write the shotlist BEFORE calling the model:
-
-| Slot | Content |
-|---|---|
-| Subject | What's in frame? |
-| Camera | Static / pan / push-in / orbit? |
-| Lighting | Key direction + temperature |
-| Motion | What moves, at what pace? Subject motion vs camera motion. |
-| Sound | Ambient bed? (only if the model supports audio) |
-
-Show this to the user as a one-sentence plan before dispatching — they
-can redirect cheaply.
-
-### Step 2 — Compose the prompt
-
-Use the format the upstream model prefers (Seedance: motion + camera +
-mood; Kling: subject + camera + style; Veo: subject + cinematography +
-sound). Bind the project's `videoAspect` and `videoLength` directly to
-the API parameters; never put them in prose.
-
-### Step 3 — Dispatch via the media contract
-
-Use the unified dispatcher — do **not** call provider APIs by hand:
-
-```bash
-node "$OD_BIN" media generate \
-  --project "$OD_PROJECT_ID" \
-  --surface video \
-  --model "<videoModel from metadata>" \
-  --aspect "<videoAspect from metadata>" \
-  --length <videoLength seconds> \
-  --output "<short-slug>-<seconds>s.mp4" \
-  --prompt "<assembled shot prompt from Step 2>"
-```
-
-The command prints one line of JSON: `{"file": {"name": "...", ...}}`.
-The bytes land in the project; the FileViewer plays it automatically.
-
-### Step 4 — Hand off
-
-Reply with: shot summary, the filename returned by the dispatcher, and
-one sentence on what to try if the user wants a variation.
-
-## Hard rules
-
- One shot per turn. Multi-shot timelines belong in a hyperframes /
-  interactive-video skill, not here.
- Match `videoAspect` exactly — re-renders are slow.
- Never ship a video without saving the file — the user expects
-  something to play in the file viewer.
- When the underlying model fails (NSFW filter, content policy,
-  timeout), report the error verbatim. Don't silently retry.
@@ -1,90 +0,0 @@
-<!doctype html>
-<html lang="en">
-  <head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1" />
-    <title>Short-form video — example</title>
-    <style>
-      :root {
-        --bg: #0e0d0c;
-        --panel: #1a1816;
-        --ink: #f5efe5;
-        --muted: #8b8579;
-        --accent: #c96442;
-      }
-      * { box-sizing: border-box; }
-      html, body { margin: 0; padding: 0; background: var(--bg); color: var(--ink);
-        font-family: 'Iowan Old Style', 'Charter', Georgia, serif; }
-      body { min-height: 100dvh; display: grid; place-items: center; padding: 32px; }
-      .stage {
-        width: min(720px, 92vw);
-        background: var(--panel);
-        border-radius: 8px;
-        padding: 22px;
-        box-shadow: 0 24px 60px rgba(0,0,0,0.45);
-      }
-      .frame {
-        position: relative;
-        aspect-ratio: 16 / 9;
-        border-radius: 6px;
-        overflow: hidden;
-        background:
-          radial-gradient(circle at 30% 35%, #d8b08b 0%, #6f4a35 40%, #1a120c 80%);
-      }
-      .frame::after {
-        content: ''; position: absolute; inset: 0;
-        background: repeating-linear-gradient(0deg, rgba(0,0,0,0.18) 0 1px, transparent 1px 4px);
-        pointer-events: none;
-        animation: scan 12s linear infinite;
-      }
-      @keyframes scan { from { background-position-y: 0; } to { background-position-y: 200px; } }
-      .frame .mug {
-        position: absolute; left: 50%; top: 56%; transform: translate(-50%, -50%);
-        width: 28%; aspect-ratio: 1 / 1;
-        background: radial-gradient(ellipse at 35% 35%, #f5efe5 0%, #c2b8a7 50%, #6f6757 100%);
-        border-radius: 18% 18% 22% 22% / 28% 28% 18% 18%;
-        box-shadow: 18px 6px 30px rgba(0,0,0,0.45);
-        animation: turn 6s ease-in-out infinite alternate;
-      }
-      .frame .mug::after {
-        content: ''; position: absolute; right: -14%; top: 28%;
-        width: 18%; height: 44%;
-        border: 6px solid #c2b8a7; border-left: none; border-radius: 0 100% 100% 0 / 0 50% 50% 0;
-      }
-      @keyframes turn { from { transform: translate(-50%, -50%) rotate(-6deg); } to { transform: translate(-50%, -50%) rotate(6deg); } }
-      .frame .timecode {
-        position: absolute; left: 14px; bottom: 12px;
-        font-family: ui-monospace, 'SF Mono', Menlo, monospace;
-        font-size: 11px; letter-spacing: 0.16em;
-        color: var(--muted);
-        background: rgba(0,0,0,0.4);
-        padding: 4px 8px; border-radius: 999px;
-      }
-      .frame .badge {
-        position: absolute; left: 14px; top: 12px;
-        font-family: ui-monospace, 'SF Mono', Menlo, monospace;
-        font-size: 10.5px; letter-spacing: 0.2em; text-transform: uppercase;
-        color: var(--accent);
-      }
-      .meta {
-        display: grid; grid-template-columns: 1fr auto; gap: 10px;
-        align-items: end; margin-top: 18px;
-      }
-      .title { font-size: 22px; line-height: 1.1; margin: 0; }
-      .sub { font-family: ui-monospace, 'SF Mono', Menlo, monospace; font-size: 11px; color: var(--muted); letter-spacing: 0.14em; text-transform: uppercase; }
-    </style>
-  </head>
-  <body>
-    <div class="stage">
-      <div class="frame">
-        <span class="badge">● REC</span>
-        <div class="mug" aria-hidden></div>
-        <span class="timecode">00:05 · 16:9 · seedance-2</span>
-      </div>
-      <div class="meta">
-        <h1 class="title">A 5-second product reveal — saved as MP4.</h1>
-        <span class="sub">Open Design · Video</span>
-      </div>
-    </div>
-  </body>
-</html>
@@ -137,6 +137,18 @@ export function App() {
    [config],
  );

+  const handleAgentModelChange = useCallback(
+    (agentId: string, choice: { model?: string; reasoning?: string }) => {
+      const prev = config.agentModels?.[agentId] ?? {};
+      const merged = { ...prev, ...choice };
+      const nextAgentModels = { ...(config.agentModels ?? {}), [agentId]: merged };
+      const next = { ...config, agentModels: nextAgentModels };
+      saveConfig(next);
+      setConfig(next);
+    },
+    [config],
+  );
+
  const handleChangeDefaultDesignSystem = useCallback(
    (designSystemId: string) => {
      const next = { ...config, designSystemId };
@@ -272,6 +284,7 @@ export function App() {
          daemonLive={daemonLive}
          onModeChange={handleModeChange}
          onAgentChange={handleAgentChange}
+          onAgentModelChange={handleAgentModelChange}
          onRefreshAgents={refreshAgents}
          onOpenSettings={openSettings}
          onBack={handleBack}
@@ -2,6 +2,7 @@ import { useEffect, useMemo, useRef, useState } from 'react';
 import { useT } from '../i18n';
 import { AgentIcon } from './AgentIcon';
 import { Icon } from './Icon';
+import { renderModelOptions } from './modelOptions';
 import type { AgentInfo, AppConfig, ExecMode } from '../types';

 interface Props {
@@ -10,6 +11,10 @@ interface Props {
  daemonLive: boolean;
  onModeChange: (mode: ExecMode) => void;
  onAgentChange: (id: string) => void;
+  onAgentModelChange: (
+    id: string,
+    choice: { model?: string; reasoning?: string },
+  ) => void;
  onOpenSettings: () => void;
  onRefreshAgents: () => void;
  onBack?: () => void;
@@ -26,6 +31,7 @@ export function AvatarMenu({
  daemonLive,
  onModeChange,
  onAgentChange,
+  onAgentModelChange,
  onOpenSettings,
  onRefreshAgents,
  onBack,
@@ -58,6 +64,19 @@ export function AvatarMenu({

  const installedAgents = agents.filter((a) => a.available);

+  // Resolve the user's model + reasoning pick for the active agent. Falls
+  // back to the agent's first declared option (`'default'`) when the user
+  // hasn't touched the picker yet so the labels don't read as empty.
+  const currentChoice =
+    (config.agentId && config.agentModels?.[config.agentId]) || {};
+  const currentModelId =
+    currentChoice.model ?? currentAgent?.models?.[0]?.id ?? null;
+  const currentReasoningId =
+    currentChoice.reasoning ?? currentAgent?.reasoningOptions?.[0]?.id ?? null;
+  const currentModelLabel = currentAgent?.models?.find(
+    (m) => m.id === currentModelId,
+  )?.label;
+
  return (
    <div className="avatar-menu" ref={wrapRef}>
      <button
@@ -88,7 +107,7 @@ export function AvatarMenu({
              {config.mode === 'api'
                ? safeHost(config.baseUrl)
                : currentAgent
-                  ? `${currentAgent.name}${currentAgent.version ? ` · ${currentAgent.version}` : ''}`
+                  ? `${currentAgent.name}${currentAgent.version ? ` · ${currentAgent.version}` : ''}${currentModelLabel && currentModelId !== 'default' ? ` · ${currentModelLabel}` : ''}`
                  : t('avatar.noAgentSelected')}
            </span>
          </div>
@@ -133,18 +152,7 @@ export function AvatarMenu({

          {config.mode === 'daemon' && installedAgents.length > 0 ? (
            <>
-              <div
-                style={{
-                  fontSize: 10.5,
-                  textTransform: 'uppercase',
-                  letterSpacing: '0.06em',
-                  color: 'var(--text-faint)',
-                  fontWeight: 600,
-                  padding: '8px 10px 4px',
-                }}
-              >
-                {t('avatar.codeAgent')}
-              </div>
+              <div className="avatar-section-label">{t('avatar.codeAgent')}</div>
              {installedAgents.map((a) => (
                <button
                  type="button"
@@ -152,7 +160,8 @@ export function AvatarMenu({
                  className="avatar-item"
                  onClick={() => {
                    onAgentChange(a.id);
-                    setOpen(false);
+                    // Keep the popover open so the user can immediately
+                    // pick a model for the agent they just chose.
                  }}
                >
                  <AgentIcon id={a.id} size={18} />
@@ -166,6 +175,71 @@ export function AvatarMenu({
                  ) : null}
                </button>
              ))}
+              {currentAgent &&
+              currentAgent.available &&
+              ((currentAgent.models && currentAgent.models.length > 0) ||
+                (currentAgent.reasoningOptions &&
+                  currentAgent.reasoningOptions.length > 0)) ? (
+                <div className="avatar-model-section">
+                  <div className="avatar-section-label">
+                    {t('avatar.modelSection')}
+                  </div>
+                  {currentAgent.models && currentAgent.models.length > 0 ? (
+                    <label className="avatar-select-row">
+                      <span className="avatar-select-label">
+                        {t('avatar.modelLabel')}
+                      </span>
+                      <select
+                        className="avatar-select"
+                        value={currentModelId ?? ''}
+                        onChange={(e) =>
+                          onAgentModelChange(currentAgent.id, {
+                            model: e.target.value,
+                          })
+                        }
+                      >
+                        {renderModelOptions(currentAgent.models)}
+                        {/* When the user has typed a custom id in
+                            Settings, surface it here too so the dropdown
+                            actually shows the active selection rather
+                            than collapsing to "Default". */}
+                        {currentModelId &&
+                        !currentAgent.models.some(
+                          (m) => m.id === currentModelId,
+                        ) ? (
+                          <option value={currentModelId}>
+                            {currentModelId}{' '}
+                            {t('avatar.customSuffix')}
+                          </option>
+                        ) : null}
+                      </select>
+                    </label>
+                  ) : null}
+                  {currentAgent.reasoningOptions &&
+                  currentAgent.reasoningOptions.length > 0 ? (
+                    <label className="avatar-select-row">
+                      <span className="avatar-select-label">
+                        {t('avatar.reasoningLabel')}
+                      </span>
+                      <select
+                        className="avatar-select"
+                        value={currentReasoningId ?? ''}
+                        onChange={(e) =>
+                          onAgentModelChange(currentAgent.id, {
+                            reasoning: e.target.value,
+                          })
+                        }
+                      >
+                        {currentAgent.reasoningOptions.map((r) => (
+                          <option key={r.id} value={r.id}>
+                            {r.label}
+                          </option>
+                        ))}
+                      </select>
+                    </label>
+                  ) : null}
+                </div>
+              ) : null}
              <button
                type="button"
                className="avatar-item"
@@ -1,8 +1,6 @@
 import { useMemo, useState } from 'react';
 import { useT } from '../i18n';
-import type { Dict } from '../i18n/types';
-import type { DesignSystemSummary, Surface } from '../types';
-import { Icon } from './Icon';
+import type { DesignSystemSummary } from '../types';

 interface Props {
  systems: DesignSystemSummary[];
@@ -11,20 +9,6 @@ interface Props {
  onPreview: (id: string) => void;
 }

-type SurfaceFilter = 'all' | Surface;
-
-const SURFACE_PILLS: { value: SurfaceFilter; labelKey: keyof Dict; icon: 'grid' | 'image' | 'video' | 'music' | null }[] = [
-  { value: 'all', labelKey: 'common.all', icon: null },
-  { value: 'web', labelKey: 'ds.surfaceWeb', icon: 'grid' },
-  { value: 'image', labelKey: 'ds.surfaceImage', icon: 'image' },
-  { value: 'video', labelKey: 'ds.surfaceVideo', icon: 'video' },
-  { value: 'audio', labelKey: 'ds.surfaceAudio', icon: 'music' },
-];
-
-function surfaceOf(system: DesignSystemSummary): Surface {
-  return system.surface ?? 'web';
-}
-
 const CATEGORY_ORDER = [
  'Starter',
  'AI & LLM',
@@ -42,43 +26,19 @@ export function DesignSystemsTab({ systems, selectedId, onSelect, onPreview }: P
  const t = useT();
  const [filter, setFilter] = useState('');
  const [category, setCategory] = useState<string>('All');
-  const [surfaceFilter, setSurfaceFilter] = useState<SurfaceFilter>('all');
-
-  // Pre-scope by surface so the category dropdown only lists categories
-  // that exist within the active surface — avoids ghost options that
-  // would yield zero rows.
-  const surfaceScoped = useMemo(
-    () =>
-      surfaceFilter === 'all'
-        ? systems
-        : systems.filter((s) => surfaceOf(s) === surfaceFilter),
-    [systems, surfaceFilter],
-  );
-
-  const surfaceCounts = useMemo(() => {
-    const counts: Record<SurfaceFilter, number> = {
-      all: systems.length,
-      web: 0,
-      image: 0,
-      video: 0,
-      audio: 0,
-    };
-    for (const s of systems) counts[surfaceOf(s)]++;
-    return counts;
-  }, [systems]);

  const categories = useMemo(() => {
    const cats = new Set<string>();
-    for (const s of surfaceScoped) cats.add(s.category || 'Uncategorized');
+    for (const s of systems) cats.add(s.category || 'Uncategorized');
    const ordered: string[] = [];
    for (const c of CATEGORY_ORDER) if (cats.has(c)) ordered.push(c);
    for (const c of [...cats].sort()) if (!ordered.includes(c)) ordered.push(c);
    return ['All', ...ordered];
-  }, [surfaceScoped]);
+  }, [systems]);

  const filtered = useMemo(() => {
    const q = filter.trim().toLowerCase();
-    return surfaceScoped.filter((s) => {
+    return systems.filter((s) => {
      if (category !== 'All' && (s.category || 'Uncategorized') !== category) return false;
      if (!q) return true;
      return (
@@ -86,7 +46,7 @@ export function DesignSystemsTab({ systems, selectedId, onSelect, onPreview }: P
        s.summary.toLowerCase().includes(q)
      );
    });
-  }, [surfaceScoped, filter, category]);
+  }, [systems, filter, category]);

  // The category metadata coming from each design system is authored in
  // English. We translate the well-known buckets (All / Uncategorized) but
@@ -100,30 +60,6 @@ export function DesignSystemsTab({ systems, selectedId, onSelect, onPreview }: P

  return (
    <div className="tab-panel">
-      <div
-        className="examples-filter-row"
-        role="tablist"
-        aria-label={t('ds.surfaceLabel')}
-      >
-        <span className="examples-filter-label">{t('ds.surfaceLabel')}</span>
-        {SURFACE_PILLS.map((p) => (
-          <button
-            key={p.value}
-            type="button"
-            role="tab"
-            aria-selected={surfaceFilter === p.value}
-            className={`filter-pill ${surfaceFilter === p.value ? 'active' : ''}`}
-            onClick={() => {
-              setSurfaceFilter(p.value);
-              setCategory('All');
-            }}
-          >
-            {p.icon ? <Icon name={p.icon} size={12} /> : null}
-            {t(p.labelKey)}
-            <span className="filter-pill-count">{surfaceCounts[p.value]}</span>
-          </button>
-        ))}
-      </div>
      <div className="tab-panel-toolbar">
        <input
          placeholder={t('ds.searchPlaceholder')}
@@ -330,30 +330,6 @@ function metadataForSkill(skill: SkillSummary): ProjectMetadata {
        typeof skill.animations === 'boolean' ? skill.animations : false,
    };
  }
-  // Media surfaces — defaults match the new-project form so the
-  // 'Use this prompt' fast-create produces sensible metadata even
-  // when the SKILL.md doesn't pin a specific model. Skills can pin
-  // a model later via `od.image_model` etc.; for now we fall back to
-  // the surface's first default.
-  if (kind === 'image') {
-    return { kind, imageModel: 'gpt-image-2', imageAspect: '1:1' };
-  }
-  if (kind === 'video') {
-    return {
-      kind,
-      videoModel: 'seedance-2',
-      videoLength: 5,
-      videoAspect: '16:9',
-    };
-  }
-  if (kind === 'audio') {
-    return {
-      kind,
-      audioKind: 'music',
-      audioModel: 'suno-v5',
-      audioDuration: 30,
-    };
-  }
  return { kind: 'other' };
 }

@@ -361,8 +337,5 @@ function kindForSkill(skill: SkillSummary): ProjectKind {
  if (skill.mode === 'deck') return 'deck';
  if (skill.mode === 'prototype') return 'prototype';
  if (skill.mode === 'template') return 'template';
-  if (skill.mode === 'image') return 'image';
-  if (skill.mode === 'video') return 'video';
-  if (skill.mode === 'audio') return 'audio';
  return 'other';
 }
@@ -4,8 +4,7 @@ import type { Dict } from '../i18n/types';
 import { fetchSkillExample } from '../providers/registry';
 import { exportAsHtml, exportAsPdf, exportAsZip } from '../runtime/exports';
 import { buildSrcdoc } from '../runtime/srcdoc';
-import type { SkillSummary, Surface } from '../types';
-import { Icon } from './Icon';
+import type { SkillSummary } from '../types';
 import { PreviewModal } from './PreviewModal';

 type TranslateFn = (key: keyof Dict, vars?: Record<string, string | number>) => string;
@@ -15,73 +14,16 @@ interface Props {
  onUsePrompt: (skill: SkillSummary) => void;
 }

-type SurfaceFilter = 'all' | Surface;
-type ModeFilter =
-  | 'all'
-  | 'prototype-desktop'
-  | 'prototype-mobile'
-  | 'deck'
-  | 'document'
-  | 'image'
-  | 'video'
-  | 'audio';
+type ModeFilter = 'all' | 'prototype-desktop' | 'prototype-mobile' | 'deck' | 'document';
 type ScenarioFilter = string;

-// Each surface gets its own type pills. We branch on `SURFACE_PILLS` so
-// the mode row reflects what makes sense within the active surface
-// (web has the most granularity; image / video / audio collapse to a
-// single mode pill so the pill count stays reasonable).
-const SURFACE_PILLS: { value: SurfaceFilter; labelKey: keyof Dict; icon: 'grid' | 'image' | 'video' | 'music' | null }[] = [
-  { value: 'all', labelKey: 'examples.modeAll', icon: null },
-  { value: 'web', labelKey: 'examples.surfaceWeb', icon: 'grid' },
-  { value: 'image', labelKey: 'examples.surfaceImage', icon: 'image' },
-  { value: 'video', labelKey: 'examples.surfaceVideo', icon: 'video' },
-  { value: 'audio', labelKey: 'examples.surfaceAudio', icon: 'music' },
-];
-
-const WEB_MODE_PILLS: { value: ModeFilter; labelKey: keyof Dict }[] = [
+const MODE_PILLS: { value: ModeFilter; labelKey: keyof Dict }[] = [
  { value: 'all', labelKey: 'examples.modeAll' },
  { value: 'prototype-desktop', labelKey: 'examples.modePrototypeDesktop' },
  { value: 'prototype-mobile', labelKey: 'examples.modePrototypeMobile' },
  { value: 'deck', labelKey: 'examples.modeDeck' },
  { value: 'document', labelKey: 'examples.modeDocument' },
 ];
-const IMAGE_MODE_PILLS: { value: ModeFilter; labelKey: keyof Dict }[] = [
-  { value: 'all', labelKey: 'examples.modeAll' },
-  { value: 'image', labelKey: 'examples.modeImage' },
-];
-const VIDEO_MODE_PILLS: { value: ModeFilter; labelKey: keyof Dict }[] = [
-  { value: 'all', labelKey: 'examples.modeAll' },
-  { value: 'video', labelKey: 'examples.modeVideo' },
-];
-const AUDIO_MODE_PILLS: { value: ModeFilter; labelKey: keyof Dict }[] = [
-  { value: 'all', labelKey: 'examples.modeAll' },
-  { value: 'audio', labelKey: 'examples.modeAudio' },
-];
-
-// Convenience — the union pill list for the "All surfaces" view.
-const ALL_MODE_PILLS: { value: ModeFilter; labelKey: keyof Dict }[] = [
-  ...WEB_MODE_PILLS,
-  { value: 'image', labelKey: 'examples.modeImage' },
-  { value: 'video', labelKey: 'examples.modeVideo' },
-  { value: 'audio', labelKey: 'examples.modeAudio' },
-];
-
-function surfaceOf(skill: SkillSummary): Surface {
-  if (skill.surface) return skill.surface;
-  if (skill.mode === 'image') return 'image';
-  if (skill.mode === 'video') return 'video';
-  if (skill.mode === 'audio') return 'audio';
-  return 'web';
-}
-
-function pillsForSurface(surface: SurfaceFilter): { value: ModeFilter; labelKey: keyof Dict }[] {
-  if (surface === 'web') return WEB_MODE_PILLS;
-  if (surface === 'image') return IMAGE_MODE_PILLS;
-  if (surface === 'video') return VIDEO_MODE_PILLS;
-  if (surface === 'audio') return AUDIO_MODE_PILLS;
-  return ALL_MODE_PILLS;
-}

 const SCENARIO_LABEL_KEY: Record<string, keyof Dict> = {
  general: 'examples.scenarioGeneral',
@@ -129,22 +71,13 @@ function matchesMode(skill: SkillSummary, filter: ModeFilter): boolean {
  if (filter === 'prototype-mobile')
    return skill.mode === 'prototype' && skill.platform === 'mobile';
  if (filter === 'document') return skill.mode === 'template';
-  if (filter === 'image') return surfaceOf(skill) === 'image';
-  if (filter === 'video') return surfaceOf(skill) === 'video';
-  if (filter === 'audio') return surfaceOf(skill) === 'audio';
  return true;
 }

-function matchesSurface(skill: SkillSummary, filter: SurfaceFilter): boolean {
-  if (filter === 'all') return true;
-  return surfaceOf(skill) === filter;
-}
-
 export function ExamplesTab({ skills, onUsePrompt }: Props) {
  const t = useT();
  // Hold preview HTML per skill across re-renders so cards never re-flicker.
  const [previews, setPreviews] = useState<Record<string, string | null>>({});
-  const [surfaceFilter, setSurfaceFilter] = useState<SurfaceFilter>('all');
  const [modeFilter, setModeFilter] = useState<ModeFilter>('all');
  const [scenarioFilter, setScenarioFilter] = useState<ScenarioFilter>('all');
  const [previewSkillId, setPreviewSkillId] = useState<string | null>(null);
@@ -173,46 +106,32 @@ export function ExamplesTab({ skills, onUsePrompt }: Props) {
    [skills, previewSkillId],
  );

-  const surfaceCounts = useMemo(() => {
-    const counts: Record<SurfaceFilter, number> = {
+  const modeCounts = useMemo(() => {
+    const c: Record<ModeFilter, number> = {
      all: skills.length,
-      web: 0,
-      image: 0,
-      video: 0,
-      audio: 0,
+      'prototype-desktop': 0,
+      'prototype-mobile': 0,
+      deck: 0,
+      document: 0,
    };
    for (const s of skills) {
-      const sf = surfaceOf(s);
-      counts[sf] = (counts[sf] ?? 0) + 1;
-    }
-    return counts;
-  }, [skills]);
-
-  const surfaceScopedSkills = useMemo(
-    () => skills.filter((s) => matchesSurface(s, surfaceFilter)),
-    [skills, surfaceFilter],
-  );
-
-  const modePills = useMemo(() => pillsForSurface(surfaceFilter), [surfaceFilter]);
-
-  const modeCounts = useMemo(() => {
-    const c: Record<string, number> = { all: surfaceScopedSkills.length };
-    for (const p of modePills) {
-      if (p.value === 'all') continue;
-      c[p.value] = surfaceScopedSkills.filter((s) => matchesMode(s, p.value)).length;
+      if (matchesMode(s, 'prototype-desktop')) c['prototype-desktop']++;
+      if (matchesMode(s, 'prototype-mobile')) c['prototype-mobile']++;
+      if (matchesMode(s, 'deck')) c.deck++;
+      if (matchesMode(s, 'document')) c.document++;
    }
    return c;
-  }, [surfaceScopedSkills, modePills]);
+  }, [skills]);

  const scenarioCounts = useMemo(() => {
    const counts = new Map<string, number>();
-    for (const s of surfaceScopedSkills) {
+    for (const s of skills) {
      if (!matchesMode(s, modeFilter)) continue;
      const tag = s.scenario || 'general';
      counts.set(tag, (counts.get(tag) ?? 0) + 1);
    }
    return counts;
-  }, [surfaceScopedSkills, modeFilter]);
+  }, [skills, modeFilter]);

  const scenarioOptions = useMemo(() => {
    const have = new Set(scenarioCounts.keys());
@@ -223,7 +142,7 @@ export function ExamplesTab({ skills, onUsePrompt }: Props) {
  }, [scenarioCounts]);

  const filtered = useMemo(() => {
-    const matched = surfaceScopedSkills.filter((s) => {
+    const matched = skills.filter((s) => {
      if (!matchesMode(s, modeFilter)) return false;
      if (scenarioFilter === 'all') return true;
      return (s.scenario || 'general') === scenarioFilter;
@@ -240,7 +159,7 @@ export function ExamplesTab({ skills, onUsePrompt }: Props) {
        return a.idx - b.idx;
      })
      .map(({ s }) => s);
-  }, [surfaceScopedSkills, modeFilter, scenarioFilter]);
+  }, [skills, modeFilter, scenarioFilter]);

  if (skills.length === 0) {
    return <div className="tab-empty">{t('examples.emptyNoSkills')}</div>;
@@ -249,38 +168,13 @@ export function ExamplesTab({ skills, onUsePrompt }: Props) {
  return (
    <div className="tab-panel examples-panel">
      <div className="examples-toolbar">
-        <div
-          className="examples-filter-row"
-          role="tablist"
-          aria-label={t('examples.surfaceLabel')}
-        >
-          <span className="examples-filter-label">{t('examples.surfaceLabel')}</span>
-          {SURFACE_PILLS.map((p) => (
-            <button
-              key={p.value}
-              type="button"
-              role="tab"
-              aria-selected={surfaceFilter === p.value}
-              className={`filter-pill ${surfaceFilter === p.value ? 'active' : ''}`}
-              onClick={() => {
-                setSurfaceFilter(p.value);
-                setModeFilter('all');
-                setScenarioFilter('all');
-              }}
-            >
-              {p.icon ? <Icon name={p.icon} size={12} /> : null}
-              {t(p.labelKey)}
-              <span className="filter-pill-count">{surfaceCounts[p.value]}</span>
-            </button>
-          ))}
-        </div>
        <div
          className="examples-filter-row"
          role="tablist"
          aria-label={t('examples.typeLabel')}
        >
          <span className="examples-filter-label">{t('examples.typeLabel')}</span>
-          {modePills.map((p) => (
+          {MODE_PILLS.map((p) => (
            <button
              key={p.value}
              type="button"
@@ -293,9 +187,7 @@ export function ExamplesTab({ skills, onUsePrompt }: Props) {
              }}
            >
              {t(p.labelKey)}
-              <span className="filter-pill-count">
-                {p.value === 'all' ? surfaceScopedSkills.length : (modeCounts[p.value] ?? 0)}
-              </span>
+              <span className="filter-pill-count">{modeCounts[p.value]}</span>
            </button>
          ))}
        </div>
@@ -553,9 +445,6 @@ function ExampleCard({
 }

 function tagForSkill(skill: SkillSummary, t: TranslateFn): string {
-  if (skill.mode === 'image') return t('examples.tagImage');
-  if (skill.mode === 'video') return t('examples.tagVideo');
-  if (skill.mode === 'audio') return t('examples.tagAudio');
  if (skill.mode === 'deck') return t('examples.tagSlideDeck');
  if (skill.mode === 'template') return t('examples.tagTemplate');
  if (skill.mode === 'design-system') return t('examples.tagDesignSystem');
@@ -42,12 +42,6 @@ export function FileViewer({
  if (file.kind === 'sketch') {
    return <ImageViewer projectId={projectId} file={file} />;
  }
-  if (file.kind === 'video') {
-    return <VideoViewer projectId={projectId} file={file} />;
-  }
-  if (file.kind === 'audio') {
-    return <AudioViewer projectId={projectId} file={file} />;
-  }
  if (file.kind === 'text' || file.kind === 'code') {
    return <TextViewer projectId={projectId} file={file} />;
  }
@@ -685,95 +679,6 @@ function ImageViewer({
  );
 }

-function VideoViewer({
-  projectId,
-  file,
-}: {
-  projectId: string;
-  file: ProjectFile;
-}) {
-  const t = useT();
-  // Bust the browser cache when the agent regenerates the file in place.
-  const url = `${projectFileUrl(projectId, file.name)}?v=${Math.round(file.mtime)}`;
-  return (
-    <div className="viewer video-viewer">
-      <div className="viewer-toolbar">
-        <div className="viewer-toolbar-left">
-          <span className="viewer-meta">
-            {t('fileViewer.videoMeta', { size: humanSize(file.size) })}
-          </span>
-        </div>
-        <div className="viewer-toolbar-actions">
-          <a
-            className="ghost-link"
-            href={projectFileUrl(projectId, file.name)}
-            download={file.name}
-          >
-            {t('fileViewer.download')}
-          </a>
-          <a
-            className="ghost-link"
-            href={projectFileUrl(projectId, file.name)}
-            target="_blank"
-            rel="noreferrer noopener"
-          >
-            {t('fileViewer.open')}
-          </a>
-        </div>
-      </div>
-      <div className="viewer-body video-body">
-        <video src={url} controls preload="metadata" />
-      </div>
-    </div>
-  );
-}
-
-function AudioViewer({
-  projectId,
-  file,
-}: {
-  projectId: string;
-  file: ProjectFile;
-}) {
-  const t = useT();
-  const url = `${projectFileUrl(projectId, file.name)}?v=${Math.round(file.mtime)}`;
-  return (
-    <div className="viewer audio-viewer">
-      <div className="viewer-toolbar">
-        <div className="viewer-toolbar-left">
-          <span className="viewer-meta">
-            {t('fileViewer.audioMeta', { size: humanSize(file.size) })}
-          </span>
-        </div>
-        <div className="viewer-toolbar-actions">
-          <a
-            className="ghost-link"
-            href={projectFileUrl(projectId, file.name)}
-            download={file.name}
-          >
-            {t('fileViewer.download')}
-          </a>
-          <a
-            className="ghost-link"
-            href={projectFileUrl(projectId, file.name)}
-            target="_blank"
-            rel="noreferrer noopener"
-          >
-            {t('fileViewer.open')}
-          </a>
-        </div>
-      </div>
-      <div className="viewer-body audio-body">
-        <div className="audio-card">
-          <Icon name="music" size={28} />
-          <div className="audio-card-name">{file.name}</div>
-          <audio src={url} controls preload="metadata" />
-        </div>
-      </div>
-    </div>
-  );
-}
-
 function TextViewer({
  projectId,
  file,
@@ -397,7 +397,7 @@ function Tab({
  onActivate: () => void;
  onClose?: () => void;
  closable?: boolean;
-  kind?: 'html' | 'image' | 'video' | 'audio' | 'sketch' | 'text' | 'code' | 'binary';
+  kind?: 'html' | 'image' | 'sketch' | 'text' | 'code' | 'binary';
 }) {
  const t = useT();
  const iconName = kindIconName(kind);
@@ -439,13 +439,9 @@ function kindIconName(
  | 'image'
  | 'pencil'
  | 'file'
-  | 'video'
-  | 'music'
  | null {
  if (kind === 'html') return 'file-code';
  if (kind === 'image') return 'image';
-  if (kind === 'video') return 'video';
-  if (kind === 'audio') return 'music';
  if (kind === 'sketch') return 'pencil';
  if (kind === 'code') return 'file-code';
  if (kind === 'text') return 'file';
@@ -24,8 +24,6 @@ type IconName =
  | 'link'
  | 'mic'
  | 'minus'
-  | 'music'
-  | 'video'
  | 'pencil'
  | 'plus'
  | 'play'
@@ -234,21 +232,6 @@ export function Icon({ name, size = 14, strokeWidth = 1.6, ...rest }: Props) {
          <path d="M5 12h14" />
        </svg>
      );
-    case 'music':
-      return (
-        <svg {...common}>
-          <path d="M9 18V5l12-2v13" />
-          <circle cx="6" cy="18" r="3" />
-          <circle cx="18" cy="16" r="3" />
-        </svg>
-      );
-    case 'video':
-      return (
-        <svg {...common}>
-          <rect x="2" y="6" width="14" height="12" rx="2" />
-          <path d="m16 10 6-3v10l-6-3z" />
-        </svg>
-      );
    case 'pencil':
      return (
        <svg {...common}>
@@ -1,31 +1,18 @@
 import { useEffect, useMemo, useRef, useState } from 'react';
 import { useT } from '../i18n';
 import type { Dict } from '../i18n/types';
-import {
-  AUDIO_MODELS_BY_KIND,
-  DEFAULT_AUDIO_MODEL,
-  DEFAULT_IMAGE_MODEL,
-  DEFAULT_VIDEO_MODEL,
-  IMAGE_MODELS,
-  VIDEO_MODELS,
-} from '../media/models';
 import type {
-  AudioKind,
  DesignSystemSummary,
-  MediaAspect,
  ProjectKind,
  ProjectMetadata,
  ProjectTemplate,
  SkillSummary,
-  Surface,
 } from '../types';
 import { Icon } from './Icon';
 import { Skeleton } from './Loading';

 type TranslateFn = (key: keyof Dict, vars?: Record<string, string | number>) => string;

-// Tabs that live INSIDE the Web surface. Image / Video / Audio surfaces
-// don't expose a tab row — they each have a single, dedicated form.
 export type CreateTab = 'prototype' | 'deck' | 'template' | 'other';

 export interface CreateInput {
@@ -51,33 +38,6 @@ const TAB_LABEL_KEYS: Record<CreateTab, keyof Dict> = {
  other: 'newproj.tabOther',
 };

-// Per-surface model lists are maintained in src/media/models.ts (and
-// daemon/media-models.js for the dispatcher). Both the picker below and
-// the agent's `od media generate --model …` invocation read the same
-// registry so the metadata captured here is what the daemon dispatches.
-
-// Surface vocab shared by the surface picker and the create-flow.
-const SURFACES: Surface[] = ['web', 'image', 'video', 'audio'];
-
-const SURFACE_LABEL_KEY: Record<Surface, keyof Dict> = {
-  web: 'newproj.surfaceWeb',
-  image: 'newproj.surfaceImage',
-  video: 'newproj.surfaceVideo',
-  audio: 'newproj.surfaceAudio',
-};
-const SURFACE_HINT_KEY: Record<Surface, keyof Dict> = {
-  web: 'newproj.surfaceWebHint',
-  image: 'newproj.surfaceImageHint',
-  video: 'newproj.surfaceVideoHint',
-  audio: 'newproj.surfaceAudioHint',
-};
-const SURFACE_ICON: Record<Surface, 'grid' | 'image' | 'video' | 'music'> = {
-  web: 'grid',
-  image: 'image',
-  video: 'video',
-  audio: 'music',
-};
-
 export function NewProjectPanel({
  skills,
  designSystems,
@@ -87,10 +47,6 @@ export function NewProjectPanel({
  loading = false,
 }: Props) {
  const t = useT();
-  // Top-level surface — controls which sub-form renders below. We keep
-  // it separate from the Web tab state so users can flip between
-  // surfaces without losing their per-surface choices.
-  const [surface, setSurface] = useState<Surface>('web');
  const [tab, setTab] = useState<CreateTab>('prototype');
  const [name, setName] = useState('');
  // Design-system selection is now an *array* internally so the same
@@ -108,32 +64,12 @@ export function NewProjectPanel({
  const [animations, setAnimations] = useState(false);
  const [templateId, setTemplateId] = useState<string | null>(null);

-  // Image / Video / Audio metadata. Kept independently so flipping
-  // surfaces preserves each surface's last pick instead of resetting.
-  const [imageModel, setImageModel] = useState<string>(DEFAULT_IMAGE_MODEL);
-  const [imageAspect, setImageAspect] = useState<MediaAspect>('1:1');
-  const [imageStyle, setImageStyle] = useState('');
-  const [videoModel, setVideoModel] = useState<string>(DEFAULT_VIDEO_MODEL);
-  const [videoLength, setVideoLength] = useState<number>(5);
-  const [videoAspect, setVideoAspect] = useState<MediaAspect>('16:9');
-  const [audioKind, setAudioKind] = useState<AudioKind>('music');
-  const [audioModel, setAudioModel] = useState<string>(DEFAULT_AUDIO_MODEL.music);
-  const [audioDuration, setAudioDuration] = useState<number>(30);
-  const [voice, setVoice] = useState('');
-
-  // When the audio kind flips, reset the model to that kind's default.
-  // This keeps users from accidentally creating a "music" project that
-  // has `audioModel: minimax-tts` because they last visited speech.
-  useEffect(() => {
-    setAudioModel(DEFAULT_AUDIO_MODEL[audioKind]);
-  }, [audioKind]);
-
  // When entering the template tab, snap to the first user-saved template
  // if there is one (and we don't already have a valid pick). The template
  // tab no longer offers a built-in fallback — the entire point is to
  // start from a template *the user* created via Share.
  useEffect(() => {
-    if (surface !== 'web' || tab !== 'template') return;
+    if (tab !== 'template') return;
    if (templates.length === 0) {
      setTemplateId(null);
      return;
@@ -141,24 +77,12 @@ export function NewProjectPanel({
    if (templateId == null || !templates.some((t) => t.id === templateId)) {
      setTemplateId(templates[0]!.id);
    }
-  }, [surface, tab, templates, templateId]);
+  }, [tab, templates, templateId]);

  // The skill the request still routes through — kept so prototype/deck
  // pick a default-rendered skill (so the agent gets the right SKILL.md
-  // body) without requiring the user to choose one explicitly. For
-  // image / video / audio surfaces we look up a skill that targets that
-  // surface; if none ships yet the request still flies (skill_id null),
-  // and the agent falls back to its base behavior + project metadata.
+  // body) without requiring the user to choose one explicitly.
  const skillIdForTab = useMemo(() => {
-    if (surface === 'image') {
-      return pickDefaultSkill(skills, 'image');
-    }
-    if (surface === 'video') {
-      return pickDefaultSkill(skills, 'video');
-    }
-    if (surface === 'audio') {
-      return pickDefaultSkill(skills, 'audio');
-    }
    if (tab === 'other') return null;
    if (tab === 'prototype') {
      const list = skills.filter((s) => s.mode === 'prototype');
@@ -173,18 +97,16 @@ export function NewProjectPanel({
        ?? null;
    }
    return null;
-  }, [surface, tab, skills]);
+  }, [tab, skills]);

-  const canCreate = !loading && (
-    surface !== 'web' || tab !== 'template' || templateId != null
-  );
+  const canCreate =
+    !loading && (tab !== 'template' || templateId != null);

  function handleCreate() {
    if (!canCreate) return;
    const primaryDs = selectedDsIds[0] ?? null;
    const inspirations = selectedDsIds.slice(1);
    const metadata = buildMetadata({
-      surface,
      tab,
      fidelity,
      speakerNotes,
@@ -192,58 +114,32 @@ export function NewProjectPanel({
      templateId,
      templates,
      inspirationIds: inspirations,
-      imageModel,
-      imageAspect,
-      imageStyle,
-      videoModel,
-      videoLength,
-      videoAspect,
-      audioKind,
-      audioModel,
-      audioDuration,
-      voice,
    });
-    const fallbackName = surface === 'web'
-      ? autoName(tab, t)
-      : autoNameForSurface(surface, t);
    onCreate({
-      name: name.trim() || fallbackName,
+      name: name.trim() || autoName(tab, t),
      skillId: skillIdForTab,
      designSystemId: primaryDs,
      metadata,
    });
  }

-  // Web surface needs a design-system picker; the media surfaces
-  // currently don't bind tokens to a system so we hide it to reduce
-  // noise. (When image/video DS surfaces ship, this will swap to a
-  // surface-filtered picker variant.)
-  const showDesignSystemPicker = surface === 'web';
-
-  // Web surface still uses the four sub-tabs; the media surfaces
-  // skip the row entirely because each has a single dedicated form.
-  const showWebTabs = surface === 'web';
-
  return (
    <div className="newproj">
-      <SurfacePicker value={surface} onChange={setSurface} />
-      {showWebTabs ? (
-        <div className="newproj-tabs" role="tablist">
-          {(Object.keys(TAB_LABEL_KEYS) as CreateTab[]).map((entry) => (
-            <button
-              key={entry}
-              role="tab"
-              aria-selected={tab === entry}
-              className={`newproj-tab ${tab === entry ? 'active' : ''}`}
-              onClick={() => setTab(entry)}
-            >
-              {t(TAB_LABEL_KEYS[entry])}
-            </button>
-          ))}
-        </div>
-      ) : null}
+      <div className="newproj-tabs" role="tablist">
+        {(Object.keys(TAB_LABEL_KEYS) as CreateTab[]).map((entry) => (
+          <button
+            key={entry}
+            role="tab"
+            aria-selected={tab === entry}
+            className={`newproj-tab ${tab === entry ? 'active' : ''}`}
+            onClick={() => setTab(entry)}
+          >
+            {t(TAB_LABEL_KEYS[entry])}
+          </button>
+        ))}
+      </div>
      <div className="newproj-body">
-        <h3 className="newproj-title">{titleForView(surface, tab, t)}</h3>
+        <h3 className="newproj-title">{titleForTab(tab, t)}</h3>

        <input
          className="newproj-name"
@@ -252,23 +148,21 @@ export function NewProjectPanel({
          onChange={(e) => setName(e.target.value)}
        />

-        {showDesignSystemPicker ? (
-          <DesignSystemPicker
-            designSystems={designSystems}
-            defaultDesignSystemId={defaultDesignSystemId}
-            selectedIds={selectedDsIds}
-            multi={dsMulti}
-            onChangeMulti={setDsMulti}
-            onChange={setSelectedDsIds}
-            loading={loading}
-          />
-        ) : null}
+        <DesignSystemPicker
+          designSystems={designSystems}
+          defaultDesignSystemId={defaultDesignSystemId}
+          selectedIds={selectedDsIds}
+          multi={dsMulti}
+          onChangeMulti={setDsMulti}
+          onChange={setSelectedDsIds}
+          loading={loading}
+        />

-        {surface === 'web' && tab === 'prototype' ? (
+        {tab === 'prototype' ? (
          <FidelityPicker value={fidelity} onChange={setFidelity} />
        ) : null}

-        {surface === 'web' && tab === 'deck' ? (
+        {tab === 'deck' ? (
          <ToggleRow
            label={t('newproj.toggleSpeakerNotes')}
            hint={t('newproj.toggleSpeakerNotesHint')}
@@ -277,7 +171,7 @@ export function NewProjectPanel({
          />
        ) : null}

-        {surface === 'web' && tab === 'template' ? (
+        {tab === 'template' ? (
          <>
            <TemplatePicker
              templates={templates}
@@ -293,54 +187,19 @@ export function NewProjectPanel({
          </>
        ) : null}

-        {surface === 'image' ? (
-          <ImageForm
-            model={imageModel}
-            onChangeModel={setImageModel}
-            aspect={imageAspect}
-            onChangeAspect={setImageAspect}
-            style={imageStyle}
-            onChangeStyle={setImageStyle}
-          />
-        ) : null}
-
-        {surface === 'video' ? (
-          <VideoForm
-            model={videoModel}
-            onChangeModel={setVideoModel}
-            length={videoLength}
-            onChangeLength={setVideoLength}
-            aspect={videoAspect}
-            onChangeAspect={setVideoAspect}
-          />
-        ) : null}
-
-        {surface === 'audio' ? (
-          <AudioForm
-            kind={audioKind}
-            onChangeKind={setAudioKind}
-            model={audioModel}
-            onChangeModel={setAudioModel}
-            duration={audioDuration}
-            onChangeDuration={setAudioDuration}
-            voice={voice}
-            onChangeVoice={setVoice}
-          />
-        ) : null}
-
        <button
          className="primary newproj-create"
          onClick={handleCreate}
          disabled={!canCreate}
          title={
-            surface === 'web' && tab === 'template' && templateId == null
+            tab === 'template' && templateId == null
              ? t('newproj.createDisabledTitle')
              : undefined
          }
        >
          <Icon name="plus" size={13} />
          <span>
-            {surface === 'web' && tab === 'template'
+            {tab === 'template'
              ? t('newproj.createFromTemplate')
              : t('newproj.create')}
          </span>
@@ -351,290 +210,6 @@ export function NewProjectPanel({
  );
 }

-function pickDefaultSkill(
-  skills: SkillSummary[],
-  surface: Surface,
-): string | null {
-  // Prefer a skill that explicitly declares `od.surface: <surface>` AND
-  // matches the corresponding mode. Fall back to mode-only match so even
-  // legacy skills authored without `surface` still get picked up.
-  const surfaceMatch = skills.find(
-    (s) => s.surface === surface && s.mode === surface,
-  );
-  if (surfaceMatch) return surfaceMatch.id;
-  const modeMatch = skills.find((s) => s.mode === surface);
-  if (modeMatch) return modeMatch.id;
-  return null;
-}
-
-function SurfacePicker({
-  value,
-  onChange,
-}: {
-  value: Surface;
-  onChange: (s: Surface) => void;
-}) {
-  const t = useT();
-  return (
-    <div className="newproj-surfaces" role="tablist" aria-label={t('newproj.surfaceLabel')}>
-      {SURFACES.map((s) => (
-        <button
-          key={s}
-          type="button"
-          role="tab"
-          aria-selected={value === s}
-          className={`newproj-surface${value === s ? ' active' : ''}`}
-          onClick={() => onChange(s)}
-        >
-          <Icon name={SURFACE_ICON[s]} size={15} />
-          <span className="newproj-surface-label">{t(SURFACE_LABEL_KEY[s])}</span>
-          <span className="newproj-surface-hint">{t(SURFACE_HINT_KEY[s])}</span>
-        </button>
-      ))}
-    </div>
-  );
-}
-
-function ImageForm({
-  model,
-  onChangeModel,
-  aspect,
-  onChangeAspect,
-  style,
-  onChangeStyle,
-}: {
-  model: string;
-  onChangeModel: (id: string) => void;
-  aspect: MediaAspect;
-  onChangeAspect: (a: MediaAspect) => void;
-  style: string;
-  onChangeStyle: (s: string) => void;
-}) {
-  const t = useT();
-  return (
-    <>
-      <ModelPicker
-        value={model}
-        onChange={onChangeModel}
-        options={IMAGE_MODELS}
-      />
-      <AspectPicker
-        value={aspect}
-        onChange={onChangeAspect}
-        options={['1:1', '16:9', '9:16', '4:3', '3:4']}
-      />
-      <div className="newproj-section">
-        <label className="newproj-label">{t('newproj.imageStyleLabel')}</label>
-        <textarea
-          className="newproj-textarea"
-          rows={3}
-          placeholder={t('newproj.imageStylePlaceholder')}
-          value={style}
-          onChange={(e) => onChangeStyle(e.target.value)}
-        />
-      </div>
-    </>
-  );
-}
-
-function VideoForm({
-  model,
-  onChangeModel,
-  length,
-  onChangeLength,
-  aspect,
-  onChangeAspect,
-}: {
-  model: string;
-  onChangeModel: (id: string) => void;
-  length: number;
-  onChangeLength: (n: number) => void;
-  aspect: MediaAspect;
-  onChangeAspect: (a: MediaAspect) => void;
-}) {
-  const t = useT();
-  const lengths = [3, 5, 10];
-  return (
-    <>
-      <ModelPicker value={model} onChange={onChangeModel} options={VIDEO_MODELS} />
-      <div className="newproj-section">
-        <label className="newproj-label">{t('newproj.videoLengthLabel')}</label>
-        <div className="pill-grid">
-          {lengths.map((s) => (
-            <button
-              key={s}
-              type="button"
-              className={`pill-grid-btn${length === s ? ' active' : ''}`}
-              onClick={() => onChangeLength(s)}
-              aria-pressed={length === s}
-            >
-              {t('newproj.videoLengthSeconds', { n: s })}
-            </button>
-          ))}
-        </div>
-      </div>
-      <AspectPicker
-        value={aspect}
-        onChange={onChangeAspect}
-        options={['16:9', '9:16', '1:1']}
-      />
-    </>
-  );
-}
-
-function AudioForm({
-  kind,
-  onChangeKind,
-  model,
-  onChangeModel,
-  duration,
-  onChangeDuration,
-  voice,
-  onChangeVoice,
-}: {
-  kind: AudioKind;
-  onChangeKind: (k: AudioKind) => void;
-  model: string;
-  onChangeModel: (id: string) => void;
-  duration: number;
-  onChangeDuration: (n: number) => void;
-  voice: string;
-  onChangeVoice: (v: string) => void;
-}) {
-  const t = useT();
-  const kinds: { id: AudioKind; labelKey: keyof Dict }[] = [
-    { id: 'music', labelKey: 'newproj.audioKindMusic' },
-    { id: 'speech', labelKey: 'newproj.audioKindSpeech' },
-    { id: 'sfx', labelKey: 'newproj.audioKindSfx' },
-  ];
-  // Music tracks are usually 30s-2min; speech / sfx work in shorter
-  // chunks. We expose three buckets per kind so users don't have to
-  // free-form-input a number.
-  const durations = kind === 'music' ? [30, 60, 120] : [10, 30, 60];
-  return (
-    <>
-      <div className="newproj-section">
-        <label className="newproj-label">{t('newproj.audioKindLabel')}</label>
-        <div className="pill-grid">
-          {kinds.map((k) => (
-            <button
-              key={k.id}
-              type="button"
-              className={`pill-grid-btn${kind === k.id ? ' active' : ''}`}
-              onClick={() => onChangeKind(k.id)}
-              aria-pressed={kind === k.id}
-            >
-              {t(k.labelKey)}
-            </button>
-          ))}
-        </div>
-      </div>
-      <ModelPicker
-        value={model}
-        onChange={onChangeModel}
-        options={AUDIO_MODELS_BY_KIND[kind]}
-      />
-      <div className="newproj-section">
-        <label className="newproj-label">{t('newproj.audioDurationLabel')}</label>
-        <div className="pill-grid">
-          {durations.map((s) => (
-            <button
-              key={s}
-              type="button"
-              className={`pill-grid-btn${duration === s ? ' active' : ''}`}
-              onClick={() => onChangeDuration(s)}
-              aria-pressed={duration === s}
-            >
-              {t('newproj.audioDurationSeconds', { n: s })}
-            </button>
-          ))}
-        </div>
-      </div>
-      {kind === 'speech' ? (
-        <div className="newproj-section">
-          <label className="newproj-label">{t('newproj.voiceLabel')}</label>
-          <textarea
-            className="newproj-textarea"
-            rows={2}
-            placeholder={t('newproj.voicePlaceholder')}
-            value={voice}
-            onChange={(e) => onChangeVoice(e.target.value)}
-          />
-        </div>
-      ) : null}
-    </>
-  );
-}
-
-function ModelPicker({
-  value,
-  onChange,
-  options,
-}: {
-  value: string;
-  onChange: (id: string) => void;
-  options: { id: string; label: string; hint: string }[];
-}) {
-  const t = useT();
-  return (
-    <div className="newproj-section">
-      <label className="newproj-label">{t('newproj.modelLabel')}</label>
-      <div className="model-grid">
-        {options.map((o) => (
-          <button
-            key={o.id}
-            type="button"
-            className={`model-card${value === o.id ? ' active' : ''}`}
-            onClick={() => onChange(o.id)}
-            aria-pressed={value === o.id}
-          >
-            <span className="model-card-name">{o.label}</span>
-            <span className="model-card-hint">{o.hint}</span>
-          </button>
-        ))}
-      </div>
-    </div>
-  );
-}
-
-function AspectPicker({
-  value,
-  onChange,
-  options,
-}: {
-  value: MediaAspect;
-  onChange: (a: MediaAspect) => void;
-  options: MediaAspect[];
-}) {
-  const t = useT();
-  const labelKeyFor: Record<MediaAspect, keyof Dict> = {
-    '1:1': 'newproj.aspectSquare',
-    '16:9': 'newproj.aspectLandscape',
-    '9:16': 'newproj.aspectPortrait',
-    '4:3': 'newproj.aspect43',
-    '3:4': 'newproj.aspect34',
-  };
-  return (
-    <div className="newproj-section">
-      <label className="newproj-label">{t('newproj.aspectLabel')}</label>
-      <div className="aspect-grid">
-        {options.map((a) => (
-          <button
-            key={a}
-            type="button"
-            className={`aspect-card${value === a ? ' active' : ''}`}
-            onClick={() => onChange(a)}
-            aria-pressed={value === a}
-          >
-            <span className={`aspect-thumb aspect-thumb-${a.replace(':', 'x')}`} aria-hidden />
-            <span className="aspect-label">{t(labelKeyFor[a])}</span>
-          </button>
-        ))}
-      </div>
-    </div>
-  );
-}
-
 function FidelityPicker({
  value,
  onChange,
@@ -1189,7 +764,6 @@ function fallbackSwatches(seed: string): string[] {
 }

 function buildMetadata(input: {
-  surface: Surface;
  tab: CreateTab;
  fidelity: 'wireframe' | 'high-fidelity';
  speakerNotes: boolean;
@@ -1197,54 +771,11 @@ function buildMetadata(input: {
  templateId: string | null;
  templates: ProjectTemplate[];
  inspirationIds: string[];
-  imageModel: string;
-  imageAspect: MediaAspect;
-  imageStyle: string;
-  videoModel: string;
-  videoLength: number;
-  videoAspect: MediaAspect;
-  audioKind: AudioKind;
-  audioModel: string;
-  audioDuration: number;
-  voice: string;
 }): ProjectMetadata {
+  const kind: ProjectKind = input.tab;
  const inspirations = input.inspirationIds.length > 0
    ? { inspirationDesignSystemIds: input.inspirationIds }
    : {};
-
-  if (input.surface === 'image') {
-    return {
-      kind: 'image',
-      imageModel: input.imageModel,
-      imageAspect: input.imageAspect,
-      imageStyle: input.imageStyle.trim() || undefined,
-      ...inspirations,
-    };
-  }
-  if (input.surface === 'video') {
-    return {
-      kind: 'video',
-      videoModel: input.videoModel,
-      videoLength: input.videoLength,
-      videoAspect: input.videoAspect,
-      ...inspirations,
-    };
-  }
-  if (input.surface === 'audio') {
-    return {
-      kind: 'audio',
-      audioKind: input.audioKind,
-      audioModel: input.audioModel,
-      audioDuration: input.audioDuration,
-      voice:
-        input.audioKind === 'speech' && input.voice.trim()
-          ? input.voice.trim()
-          : undefined,
-      ...inspirations,
-    };
-  }
-
-  const kind: ProjectKind = input.tab;
  if (input.tab === 'prototype') {
    return { kind, fidelity: input.fidelity, ...inspirations };
  }
@@ -1269,10 +800,7 @@ function buildMetadata(input: {
  return { kind: 'other', ...inspirations };
 }

-function titleForView(surface: Surface, tab: CreateTab, t: TranslateFn): string {
-  if (surface === 'image') return t('newproj.titleImage');
-  if (surface === 'video') return t('newproj.titleVideo');
-  if (surface === 'audio') return t('newproj.titleAudio');
+function titleForTab(tab: CreateTab, t: TranslateFn): string {
  switch (tab) {
    case 'prototype':
      return t('newproj.titlePrototype');
@@ -1289,8 +817,3 @@ function autoName(tab: CreateTab, t: TranslateFn): string {
  const stamp = new Date().toLocaleDateString();
  return `${t(TAB_LABEL_KEYS[tab])} · ${stamp}`;
 }
-
-function autoNameForSurface(surface: Surface, t: TranslateFn): string {
-  const stamp = new Date().toLocaleDateString();
-  return `${t(SURFACE_LABEL_KEY[surface])} · ${stamp}`;
-}
@@ -53,6 +53,10 @@ interface Props {
  daemonLive: boolean;
  onModeChange: (mode: AppConfig['mode']) => void;
  onAgentChange: (id: string) => void;
+  onAgentModelChange: (
+    id: string,
+    choice: { model?: string; reasoning?: string },
+  ) => void;
  onRefreshAgents: () => void;
  onOpenSettings: () => void;
  onBack: () => void;
@@ -72,6 +76,7 @@ export function ProjectView({
  daemonLive,
  onModeChange,
  onAgentChange,
+  onAgentModelChange,
  onRefreshAgents,
  onOpenSettings,
  onBack,
@@ -490,6 +495,7 @@ export function ProjectView({
          handlers.onError(new Error('Pick a local agent first (top bar).'));
          return;
        }
+        const choice = config.agentModels?.[config.agentId];
        void streamViaDaemon({
          agentId: config.agentId,
          history: nextHistory,
@@ -498,6 +504,8 @@ export function ProjectView({
          handlers,
          projectId: project.id,
          attachments: attachments.map((a) => a.path),
+          model: choice?.model ?? null,
+          reasoning: choice?.reasoning ?? null,
        });
      } else {
        pushEvent({ kind: 'status', label: 'requesting', detail: config.model });
@@ -728,6 +736,7 @@ export function ProjectView({
            daemonLive={daemonLive}
            onModeChange={onModeChange}
            onAgentChange={onAgentChange}
+            onAgentModelChange={onAgentModelChange}
            onOpenSettings={onOpenSettings}
            onRefreshAgents={onRefreshAgents}
            onBack={onBack}
@@ -2,6 +2,11 @@ import { useEffect, useMemo, useState } from 'react';
 import { LOCALE_LABEL, LOCALES, useI18n } from '../i18n';
 import type { Locale } from '../i18n';
 import { AgentIcon } from './AgentIcon';
+import {
+  CUSTOM_MODEL_SENTINEL,
+  isCustomModel,
+  renderModelOptions,
+} from './modelOptions';
 import type { AgentInfo, AppConfig, ExecMode } from '../types';

 interface Props {
@@ -183,6 +188,108 @@ export function SettingsDialog({
                })}
              </div>
            )}
+            {(() => {
+              const selected = agents.find(
+                (a) => a.id === cfg.agentId && a.available,
+              );
+              if (!selected) return null;
+              const hasModels =
+                Array.isArray(selected.models) && selected.models.length > 0;
+              const hasReasoning =
+                Array.isArray(selected.reasoningOptions) &&
+                selected.reasoningOptions.length > 0;
+              if (!hasModels && !hasReasoning) return null;
+              const choice = cfg.agentModels?.[selected.id] ?? {};
+              const setChoice = (
+                next: { model?: string; reasoning?: string },
+              ) => {
+                setCfg((c) => {
+                  const prev = c.agentModels?.[selected.id] ?? {};
+                  return {
+                    ...c,
+                    agentModels: {
+                      ...(c.agentModels ?? {}),
+                      [selected.id]: { ...prev, ...next },
+                    },
+                  };
+                });
+              };
+              const modelValue =
+                choice.model ?? selected.models?.[0]?.id ?? '';
+              const reasoningValue =
+                choice.reasoning ??
+                selected.reasoningOptions?.[0]?.id ?? '';
+              const customActive =
+                hasModels && isCustomModel(modelValue, selected.models!);
+              const selectValue = customActive
+                ? CUSTOM_MODEL_SENTINEL
+                : modelValue;
+              return (
+                <div className="agent-model-row">
+                  {hasModels ? (
+                    <label className="field">
+                      <span className="field-label">
+                        {t('settings.modelPicker')}
+                      </span>
+                      <select
+                        value={selectValue}
+                        onChange={(e) => {
+                          if (e.target.value === CUSTOM_MODEL_SENTINEL) {
+                            // Switching to "Custom…" should clear the
+                            // value so the input below opens empty for
+                            // typing — keeping the previous live id
+                            // would defeat the point.
+                            setChoice({ model: '' });
+                          } else {
+                            setChoice({ model: e.target.value });
+                          }
+                        }}
+                      >
+                        {renderModelOptions(selected.models!)}
+                        <option value={CUSTOM_MODEL_SENTINEL}>
+                          {t('settings.modelCustom')}
+                        </option>
+                      </select>
+                    </label>
+                  ) : null}
+                  {customActive ? (
+                    <label className="field">
+                      <span className="field-label">
+                        {t('settings.modelCustomLabel')}
+                      </span>
+                      <input
+                        type="text"
+                        value={modelValue}
+                        placeholder={t('settings.modelCustomPlaceholder')}
+                        onChange={(e) =>
+                          setChoice({ model: e.target.value.trim() })
+                        }
+                      />
+                    </label>
+                  ) : null}
+                  {hasReasoning ? (
+                    <label className="field">
+                      <span className="field-label">
+                        {t('settings.reasoningPicker')}
+                      </span>
+                      <select
+                        value={reasoningValue}
+                        onChange={(e) =>
+                          setChoice({ reasoning: e.target.value })
+                        }
+                      >
+                        {selected.reasoningOptions!.map((r) => (
+                          <option key={r.id} value={r.id}>
+                            {r.label}
+                          </option>
+                        ))}
+                      </select>
+                    </label>
+                  ) : null}
+                  <p className="hint">{t('settings.modelPickerHint')}</p>
+                </div>
+              );
+            })()}
          </section>
        ) : (
          <section className="settings-section">
@@ -0,0 +1,71 @@
+import type { AgentModelOption } from '../types';
+
+// Render the `<option>` children for a model `<select>`. When the list
+// contains `provider/model` ids (opencode's listing has hundreds), we
+// group them under `<optgroup>` so the dropdown is navigable. Flat lists
+// (Claude, Codex, Gemini, Qwen) are emitted as plain options.
+//
+// `'default'` is always pinned first (no group), so the user can return
+// to "let the CLI decide" with one click.
+export function renderModelOptions(models: AgentModelOption[]) {
+  const groups = new Map<string, AgentModelOption[]>();
+  const flat: AgentModelOption[] = [];
+  for (const m of models) {
+    const slash = m.id.indexOf('/');
+    if (m.id === 'default' || slash <= 0) {
+      flat.push(m);
+      continue;
+    }
+    const provider = m.id.slice(0, slash);
+    const arr = groups.get(provider) ?? [];
+    arr.push(m);
+    groups.set(provider, arr);
+  }
+  if (groups.size === 0) {
+    return (
+      <>
+        {flat.map((m) => (
+          <option key={m.id} value={m.id}>
+            {m.label}
+          </option>
+        ))}
+      </>
+    );
+  }
+  return (
+    <>
+      {flat.map((m) => (
+        <option key={m.id} value={m.id}>
+          {m.label}
+        </option>
+      ))}
+      {Array.from(groups.entries()).map(([provider, items]) => (
+        <optgroup key={provider} label={provider}>
+          {items.map((m) => (
+            <option key={m.id} value={m.id}>
+              {/* Strip the redundant `provider/` prefix from the label
+                  inside its own optgroup; keep it in the value so the
+                  CLI sees the fully-qualified id. */}
+              {m.label.startsWith(`${provider}/`)
+                ? m.label.slice(provider.length + 1)
+                : m.label}
+            </option>
+          ))}
+        </optgroup>
+      ))}
+    </>
+  );
+}
+
+// True when the picked model id isn't one of the listed options — i.e.
+// the user has typed a custom id and we should keep the custom input
+// visible / the dropdown showing "Custom…".
+export function isCustomModel(
+  modelId: string | null | undefined,
+  models: AgentModelOption[],
+): boolean {
+  if (!modelId) return false;
+  return !models.some((m) => m.id === modelId);
+}
+
+export const CUSTOM_MODEL_SENTINEL = '__custom__';
@@ -83,6 +83,13 @@ export const en: Dict = {
  'settings.noAgentSelected': 'no agent selected',
  'settings.language': 'Language',
  'settings.languageHint': 'Switch the interface language. Saved to this browser.',
+  'settings.modelPicker': 'Model',
+  'settings.reasoningPicker': 'Reasoning effort',
+  'settings.modelPickerHint':
+    'Fetched from the CLI when it exposes a `models` command. "Default" leaves the choice to the CLI’s own config; "Custom…" lets you type any model id the CLI accepts.',
+  'settings.modelCustom': 'Custom (type below)…',
+  'settings.modelCustomLabel': 'Custom model id',
+  'settings.modelCustomPlaceholder': 'e.g. anthropic/claude-sonnet-4-6',

  'entry.tabDesigns': 'Designs',
  'entry.tabExamples': 'Examples',
@@ -92,16 +99,6 @@ export const en: Dict = {
  'entry.resizeAria': 'Resize sidebar',
  'entry.loadingWorkspace': 'Loading workspace…',

-  'newproj.surfaceLabel': 'Surface',
-  'newproj.surfaceWeb': 'Web',
-  'newproj.surfaceImage': 'Image',
-  'newproj.surfaceVideo': 'Video',
-  'newproj.surfaceAudio': 'Audio',
-  'newproj.surfaceWebHint': 'Prototypes, decks, docs',
-  'newproj.surfaceImageHint': 'Posters, illustrations, art',
-  'newproj.surfaceVideoHint': 'Short-form clips, motion',
-  'newproj.surfaceAudioHint': 'Music, voice, sfx',
-
  'newproj.tabPrototype': 'Prototype',
  'newproj.tabDeck': 'Slide deck',
  'newproj.tabTemplate': 'From template',
@@ -110,32 +107,6 @@ export const en: Dict = {
  'newproj.titleDeck': 'New slide deck',
  'newproj.titleTemplate': 'Start from a template',
  'newproj.titleOther': 'New project',
-  'newproj.titleImage': 'New image',
-  'newproj.titleVideo': 'New video',
-  'newproj.titleAudio': 'New audio',
-
-  'newproj.modelLabel': 'Model',
-  'newproj.modelHint': 'Pick the upstream provider the agent should call.',
-  'newproj.aspectLabel': 'Aspect ratio',
-  'newproj.aspectSquare': 'Square · 1:1',
-  'newproj.aspectLandscape': 'Landscape · 16:9',
-  'newproj.aspectPortrait': 'Portrait · 9:16',
-  'newproj.aspect43': 'Wide · 4:3',
-  'newproj.aspect34': 'Tall · 3:4',
-  'newproj.imageStyleLabel': 'Style notes (optional)',
-  'newproj.imageStylePlaceholder':
-    'e.g. editorial photography, muted earth tones, soft daylight',
-  'newproj.videoLengthLabel': 'Length',
-  'newproj.videoLengthSeconds': '{n}s',
-  'newproj.audioKindLabel': 'What are we making?',
-  'newproj.audioKindMusic': 'Music',
-  'newproj.audioKindSpeech': 'Voice / TTS',
-  'newproj.audioKindSfx': 'SFX / foley',
-  'newproj.audioDurationLabel': 'Duration',
-  'newproj.audioDurationSeconds': '{n}s',
-  'newproj.voiceLabel': 'Voice (TTS only)',
-  'newproj.voicePlaceholder':
-    'e.g. warm female narrator, British English, calm pacing',
  'newproj.namePlaceholder': 'Project name',
  'newproj.fidelityLabel': 'Fidelity',
  'newproj.fidelityWireframe': 'Wireframe',
@@ -192,17 +163,6 @@ export const en: Dict = {
  'examples.modePrototypeMobile': 'Prototypes · Mobile',
  'examples.modeDeck': 'Slides',
  'examples.modeDocument': 'Docs & templates',
-  'examples.modeImage': 'Images',
-  'examples.modeVideo': 'Videos',
-  'examples.modeAudio': 'Audio',
-  'examples.surfaceLabel': 'Surface',
-  'examples.surfaceWeb': 'Web',
-  'examples.surfaceImage': 'Image',
-  'examples.surfaceVideo': 'Video',
-  'examples.surfaceAudio': 'Audio',
-  'examples.tagImage': 'Image',
-  'examples.tagVideo': 'Video',
-  'examples.tagAudio': 'Audio',
  'examples.scenarioGeneral': 'General',
  'examples.scenarioEngineering': 'Engineering',
  'examples.scenarioProduct': 'Product',
@@ -244,11 +204,6 @@ export const en: Dict = {
  'ds.categoryUncategorized': 'Uncategorized',
  'ds.showcase': 'Showcase',
  'ds.tokens': 'Tokens',
-  'ds.surfaceLabel': 'Surface',
-  'ds.surfaceWeb': 'Web',
-  'ds.surfaceImage': 'Image',
-  'ds.surfaceVideo': 'Video',
-  'ds.surfaceAudio': 'Audio',

  'avatar.title': 'Account & settings',
  'avatar.localCli': 'Local CLI',
@@ -263,6 +218,10 @@ export const en: Dict = {
  'avatar.metaOffline': 'offline',
  'avatar.metaSelected': 'selected',
  'avatar.noAgentSelected': 'no agent selected',
+  'avatar.modelSection': 'Model',
+  'avatar.modelLabel': 'Model',
+  'avatar.reasoningLabel': 'Reasoning',
+  'avatar.customSuffix': '(custom)',

  'project.backToProjects': 'Back to projects',
  'project.metaFreeform': 'freeform',
@@ -403,8 +362,6 @@ export const en: Dict = {
  'fileViewer.open': 'Open',
  'fileViewer.imageMeta': 'Image · {size}',
  'fileViewer.sketchMeta': 'Sketch · {size}',
-  'fileViewer.videoMeta': 'Video · {size}',
-  'fileViewer.audioMeta': 'Audio · {size}',
  'fileViewer.reload': 'Reload',
  'fileViewer.reloadDisk': 'Reload from disk',
  'fileViewer.copy': 'Copy',
@@ -82,6 +82,13 @@ export const zhCN: Dict = {
  'settings.noAgentSelected': '尚未选择代理',
  'settings.language': '界面语言',
  'settings.languageHint': '切换界面语言，设置仅保存在当前浏览器。',
+  'settings.modelPicker': '模型',
+  'settings.reasoningPicker': '推理强度',
+  'settings.modelPickerHint':
+    '当 CLI 提供 `models` 命令时会自动拉取。选择「默认」则沿用 CLI 自身的配置；选择「自定义」可手动输入任何 CLI 支持的模型 id。',
+  'settings.modelCustom': '自定义（在下方填写）…',
+  'settings.modelCustomLabel': '自定义模型 id',
+  'settings.modelCustomPlaceholder': '例如 anthropic/claude-sonnet-4-6',

  'entry.tabDesigns': '我的设计',
  'entry.tabExamples': '示例',
@@ -91,16 +98,6 @@ export const zhCN: Dict = {
  'entry.resizeAria': '调整侧边栏宽度',
  'entry.loadingWorkspace': '正在加载工作区…',

-  'newproj.surfaceLabel': '类型',
-  'newproj.surfaceWeb': '网页',
-  'newproj.surfaceImage': '图片',
-  'newproj.surfaceVideo': '视频',
-  'newproj.surfaceAudio': '音频',
-  'newproj.surfaceWebHint': '原型 / 幻灯 / 文档',
-  'newproj.surfaceImageHint': '海报 / 插画 / 设计稿',
-  'newproj.surfaceVideoHint': '短视频 / 动效',
-  'newproj.surfaceAudioHint': '音乐 / 配音 / 音效',
-
  'newproj.tabPrototype': '原型',
  'newproj.tabDeck': '幻灯片',
  'newproj.tabTemplate': '从模板',
@@ -109,30 +106,6 @@ export const zhCN: Dict = {
  'newproj.titleDeck': '新建幻灯片',
  'newproj.titleTemplate': '从模板开始',
  'newproj.titleOther': '新建项目',
-  'newproj.titleImage': '新建图片',
-  'newproj.titleVideo': '新建视频',
-  'newproj.titleAudio': '新建音频',
-
-  'newproj.modelLabel': '模型',
-  'newproj.modelHint': '选择代理调用的上游模型。',
-  'newproj.aspectLabel': '画幅比例',
-  'newproj.aspectSquare': '方形 · 1:1',
-  'newproj.aspectLandscape': '横版 · 16:9',
-  'newproj.aspectPortrait': '竖版 · 9:16',
-  'newproj.aspect43': '宽屏 · 4:3',
-  'newproj.aspect34': '高屏 · 3:4',
-  'newproj.imageStyleLabel': '风格备注（可选）',
-  'newproj.imageStylePlaceholder': '例如：编辑摄影、低饱和大地色、柔光日光',
-  'newproj.videoLengthLabel': '时长',
-  'newproj.videoLengthSeconds': '{n}秒',
-  'newproj.audioKindLabel': '生成什么？',
-  'newproj.audioKindMusic': '音乐',
-  'newproj.audioKindSpeech': '配音 / TTS',
-  'newproj.audioKindSfx': '音效 / 拟音',
-  'newproj.audioDurationLabel': '时长',
-  'newproj.audioDurationSeconds': '{n}秒',
-  'newproj.voiceLabel': '声线（仅 TTS）',
-  'newproj.voicePlaceholder': '例如：温暖女声旁白，普通话，平稳语速',
  'newproj.namePlaceholder': '项目名称',
  'newproj.fidelityLabel': '精度',
  'newproj.fidelityWireframe': '线框图',
@@ -187,17 +160,6 @@ export const zhCN: Dict = {
  'examples.modePrototypeMobile': '原型 · 移动端',
  'examples.modeDeck': '幻灯片',
  'examples.modeDocument': '文档与模板',
-  'examples.modeImage': '图片',
-  'examples.modeVideo': '视频',
-  'examples.modeAudio': '音频',
-  'examples.surfaceLabel': '类型',
-  'examples.surfaceWeb': '网页',
-  'examples.surfaceImage': '图片',
-  'examples.surfaceVideo': '视频',
-  'examples.surfaceAudio': '音频',
-  'examples.tagImage': '图片',
-  'examples.tagVideo': '视频',
-  'examples.tagAudio': '音频',
  'examples.scenarioGeneral': '通用',
  'examples.scenarioEngineering': '工程',
  'examples.scenarioProduct': '产品',
@@ -239,11 +201,6 @@ export const zhCN: Dict = {
  'ds.categoryUncategorized': '未分类',
  'ds.showcase': '展示',
  'ds.tokens': 'Token',
-  'ds.surfaceLabel': '类型',
-  'ds.surfaceWeb': '网页',
-  'ds.surfaceImage': '图片',
-  'ds.surfaceVideo': '视频',
-  'ds.surfaceAudio': '音频',

  'avatar.title': '账户与设置',
  'avatar.localCli': '本机 CLI',
@@ -258,6 +215,10 @@ export const zhCN: Dict = {
  'avatar.metaOffline': '未运行',
  'avatar.metaSelected': '已选',
  'avatar.noAgentSelected': '尚未选择代理',
+  'avatar.modelSection': '模型',
+  'avatar.modelLabel': '模型',
+  'avatar.reasoningLabel': '推理',
+  'avatar.customSuffix': '（自定义）',

  'project.backToProjects': '返回项目列表',
  'project.metaFreeform': '自由设计',
@@ -392,8 +353,6 @@ export const zhCN: Dict = {
  'fileViewer.open': '打开',
  'fileViewer.imageMeta': '图片 · {size}',
  'fileViewer.sketchMeta': '草图 · {size}',
-  'fileViewer.videoMeta': '视频 · {size}',
-  'fileViewer.audioMeta': '音频 · {size}',
  'fileViewer.reload': '重新加载',
  'fileViewer.reloadDisk': '从磁盘重新加载',
  'fileViewer.copy': '复制',
@@ -93,6 +93,12 @@ export interface Dict {
  'settings.noAgentSelected': string;
  'settings.language': string;
  'settings.languageHint': string;
+  'settings.modelPicker': string;
+  'settings.reasoningPicker': string;
+  'settings.modelPickerHint': string;
+  'settings.modelCustom': string;
+  'settings.modelCustomLabel': string;
+  'settings.modelCustomPlaceholder': string;

  // Entry view / tabs
  'entry.tabDesigns': string;
@@ -104,19 +110,6 @@ export interface Dict {
  'entry.loadingWorkspace': string;

  // New project panel
-  // Top-level surface picker — sits above the existing tabs and switches
-  // the form between Web (prototype/deck/template/other), Image, Video,
-  // and Audio surfaces.
-  'newproj.surfaceLabel': string;
-  'newproj.surfaceWeb': string;
-  'newproj.surfaceImage': string;
-  'newproj.surfaceVideo': string;
-  'newproj.surfaceAudio': string;
-  'newproj.surfaceWebHint': string;
-  'newproj.surfaceImageHint': string;
-  'newproj.surfaceVideoHint': string;
-  'newproj.surfaceAudioHint': string;
-
  'newproj.tabPrototype': string;
  'newproj.tabDeck': string;
  'newproj.tabTemplate': string;
@@ -125,31 +118,6 @@ export interface Dict {
  'newproj.titleDeck': string;
  'newproj.titleTemplate': string;
  'newproj.titleOther': string;
-  'newproj.titleImage': string;
-  'newproj.titleVideo': string;
-  'newproj.titleAudio': string;
-
-  // Media-specific labels for the Image / Video / Audio forms.
-  'newproj.modelLabel': string;
-  'newproj.modelHint': string;
-  'newproj.aspectLabel': string;
-  'newproj.aspectSquare': string;
-  'newproj.aspectLandscape': string;
-  'newproj.aspectPortrait': string;
-  'newproj.aspect43': string;
-  'newproj.aspect34': string;
-  'newproj.imageStyleLabel': string;
-  'newproj.imageStylePlaceholder': string;
-  'newproj.videoLengthLabel': string;
-  'newproj.videoLengthSeconds': string;
-  'newproj.audioKindLabel': string;
-  'newproj.audioKindMusic': string;
-  'newproj.audioKindSpeech': string;
-  'newproj.audioKindSfx': string;
-  'newproj.audioDurationLabel': string;
-  'newproj.audioDurationSeconds': string;
-  'newproj.voiceLabel': string;
-  'newproj.voicePlaceholder': string;
  'newproj.namePlaceholder': string;
  'newproj.fidelityLabel': string;
  'newproj.fidelityWireframe': string;
@@ -205,17 +173,6 @@ export interface Dict {
  'examples.modePrototypeMobile': string;
  'examples.modeDeck': string;
  'examples.modeDocument': string;
-  'examples.modeImage': string;
-  'examples.modeVideo': string;
-  'examples.modeAudio': string;
-  'examples.surfaceLabel': string;
-  'examples.surfaceWeb': string;
-  'examples.surfaceImage': string;
-  'examples.surfaceVideo': string;
-  'examples.surfaceAudio': string;
-  'examples.tagImage': string;
-  'examples.tagVideo': string;
-  'examples.tagAudio': string;
  'examples.scenarioGeneral': string;
  'examples.scenarioEngineering': string;
  'examples.scenarioProduct': string;
@@ -258,12 +215,6 @@ export interface Dict {
  'ds.categoryUncategorized': string;
  'ds.showcase': string;
  'ds.tokens': string;
-  // Surface filter row in the Design systems tab.
-  'ds.surfaceLabel': string;
-  'ds.surfaceWeb': string;
-  'ds.surfaceImage': string;
-  'ds.surfaceVideo': string;
-  'ds.surfaceAudio': string;

  // Avatar menu (project topbar)
  'avatar.title': string;
@@ -279,6 +230,10 @@ export interface Dict {
  'avatar.metaOffline': string;
  'avatar.metaSelected': string;
  'avatar.noAgentSelected': string;
+  'avatar.modelSection': string;
+  'avatar.modelLabel': string;
+  'avatar.reasoningLabel': string;
+  'avatar.customSuffix': string;

  // Project view / chat pane / composer
  'project.backToProjects': string;
@@ -413,8 +368,6 @@ export interface Dict {
  'fileViewer.open': string;
  'fileViewer.imageMeta': string;
  'fileViewer.sketchMeta': string;
-  'fileViewer.videoMeta': string;
-  'fileViewer.audioMeta': string;
  'fileViewer.reload': string;
  'fileViewer.reloadDisk': string;
  'fileViewer.copy': string;
@@ -293,6 +293,45 @@ code {
  font-variant-numeric: tabular-nums;
  white-space: nowrap;
 }
+.avatar-section-label {
+  font-size: 10.5px;
+  text-transform: uppercase;
+  letter-spacing: 0.06em;
+  color: var(--text-faint);
+  font-weight: 600;
+  padding: 8px 10px 4px;
+}
+.avatar-model-section {
+  padding: 2px 10px 6px;
+  display: flex;
+  flex-direction: column;
+  gap: 6px;
+  border-top: 1px dashed var(--border-soft);
+  margin-top: 4px;
+}
+.avatar-select-row {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  font-size: 12px;
+  color: var(--text-muted);
+}
+.avatar-select-label {
+  flex-shrink: 0;
+  min-width: 64px;
+}
+.avatar-select {
+  flex: 1;
+  min-width: 0;
+  font-size: 12px;
+  padding: 4px 6px;
+  border-radius: var(--radius-sm);
+  border: 1px solid var(--border);
+  background: var(--bg-panel);
+  color: var(--text);
+  cursor: pointer;
+}
+.avatar-select:focus { outline: 2px solid var(--accent-soft, var(--border-strong)); }

 /* Environment pill — only used in entry view header now */
 .env-pill {
@@ -827,6 +866,23 @@ code {
  white-space: nowrap; overflow: hidden; text-overflow: ellipsis;
 }
 .agent-card-meta .muted { color: var(--text-soft); font-style: italic; }
+.agent-model-row {
+  display: flex;
+  flex-direction: column;
+  gap: 8px;
+  padding: 12px;
+  border: 1px solid var(--border-soft);
+  border-radius: var(--radius-sm);
+  background: var(--bg-subtle);
+}
+.agent-model-row .field { gap: 4px; }
+.agent-model-row .field-label {
+  font-size: 11.5px;
+  text-transform: uppercase;
+  letter-spacing: 0.04em;
+  color: var(--text-muted);
+}
+.agent-model-row .hint { margin: 0; font-size: 11.5px; }
 .status-dot {
  width: 8px; height: 8px;
  border-radius: 50%;
@@ -1091,212 +1147,6 @@ code {
  text-align: center;
 }

-/* -------- Surface picker (top-level Web/Image/Video/Audio) ----------- */
-.newproj-surfaces {
-  display: grid;
-  grid-template-columns: repeat(2, 1fr);
-  gap: 6px;
-  padding: 10px 10px 8px;
-  border-bottom: 1px solid var(--border);
-}
-.newproj-surface {
-  display: flex;
-  flex-direction: column;
-  align-items: flex-start;
-  gap: 4px;
-  padding: 10px 10px 9px;
-  background: var(--bg-panel);
-  border: 1px solid var(--border);
-  border-radius: var(--radius-sm);
-  cursor: pointer;
-  text-align: left;
-  color: var(--text);
-  transition: border-color 120ms ease, background 120ms ease, box-shadow 120ms ease;
-  min-width: 0;
-}
-.newproj-surface:hover:not(:disabled) { border-color: var(--border-strong); }
-.newproj-surface.active {
-  border-color: var(--accent);
-  background: var(--accent-tint);
-  box-shadow: 0 0 0 1px var(--accent);
-}
-.newproj-surface > svg { color: var(--text-muted); }
-.newproj-surface.active > svg { color: var(--accent); }
-.newproj-surface-label {
-  font-size: 12.5px;
-  font-weight: 600;
-}
-.newproj-surface-hint {
-  font-size: 10.5px;
-  color: var(--text-muted);
-  line-height: 1.3;
-  white-space: nowrap;
-  overflow: hidden;
-  text-overflow: ellipsis;
-  max-width: 100%;
-}
-
-/* -------- Model / aspect / pill grids (image/video/audio forms) ----- */
-.newproj-textarea {
-  width: 100%;
-  resize: vertical;
-  min-height: 60px;
-  padding: 10px 12px;
-  border: 1px solid var(--border);
-  border-radius: var(--radius-sm);
-  background: var(--bg-panel);
-  font: inherit;
-  font-size: 13px;
-  color: var(--text);
-  line-height: 1.45;
-}
-.newproj-textarea:focus {
-  outline: none;
-  border-color: var(--accent);
-  box-shadow: 0 0 0 1px var(--accent);
-}
-
-.model-grid {
-  display: grid;
-  grid-template-columns: 1fr 1fr;
-  gap: 6px;
-}
-.model-card {
-  display: flex;
-  flex-direction: column;
-  align-items: flex-start;
-  gap: 2px;
-  padding: 8px 10px;
-  background: var(--bg-panel);
-  border: 1px solid var(--border);
-  border-radius: var(--radius-sm);
-  cursor: pointer;
-  text-align: left;
-  transition: border-color 120ms ease, background 120ms ease;
-}
-.model-card:hover:not(:disabled) { border-color: var(--border-strong); }
-.model-card.active {
-  border-color: var(--accent);
-  background: var(--accent-tint);
-}
-.model-card-name {
-  font-size: 12.5px;
-  font-weight: 600;
-  color: var(--text);
-  font-family: var(--font-mono, ui-monospace, SFMono-Regular, Menlo, monospace);
-}
-.model-card-hint {
-  font-size: 10.5px;
-  color: var(--text-muted);
-}
-
-.aspect-grid {
-  display: grid;
-  grid-template-columns: repeat(auto-fit, minmax(64px, 1fr));
-  gap: 6px;
-}
-.aspect-card {
-  display: flex;
-  flex-direction: column;
-  align-items: center;
-  gap: 6px;
-  padding: 8px 6px 9px;
-  background: var(--bg-panel);
-  border: 1px solid var(--border);
-  border-radius: var(--radius-sm);
-  cursor: pointer;
-  transition: border-color 120ms ease, background 120ms ease;
-}
-.aspect-card:hover:not(:disabled) { border-color: var(--border-strong); }
-.aspect-card.active {
-  border-color: var(--accent);
-  background: var(--accent-tint);
-}
-.aspect-thumb {
-  background: var(--bg-subtle);
-  border: 1px solid var(--border-soft);
-  border-radius: 3px;
-  display: block;
-}
-.aspect-thumb-1x1 { width: 24px; height: 24px; }
-.aspect-thumb-16x9 { width: 32px; height: 18px; }
-.aspect-thumb-9x16 { width: 18px; height: 32px; }
-.aspect-thumb-4x3 { width: 28px; height: 21px; }
-.aspect-thumb-3x4 { width: 21px; height: 28px; }
-.aspect-label {
-  font-size: 10.5px;
-  color: var(--text-muted);
-  text-align: center;
-  white-space: nowrap;
-}
-.aspect-card.active .aspect-label { color: var(--text); }
-
-.pill-grid {
-  display: flex;
-  flex-wrap: wrap;
-  gap: 6px;
-}
-.pill-grid-btn {
-  padding: 6px 12px;
-  background: var(--bg-panel);
-  border: 1px solid var(--border);
-  border-radius: 999px;
-  font-size: 12px;
-  color: var(--text);
-  cursor: pointer;
-  transition: border-color 120ms ease, background 120ms ease;
-}
-.pill-grid-btn:hover:not(:disabled) { border-color: var(--border-strong); }
-.pill-grid-btn.active {
-  border-color: var(--accent);
-  background: var(--accent-tint);
-  color: var(--text);
-  font-weight: 500;
-}
-
-/* -------- Video / audio viewers -------------------------------------- */
-.video-body, .audio-body {
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  padding: 24px;
-  background: var(--bg-subtle);
-  min-height: 0;
-  flex: 1;
-}
-.video-body video {
-  max-width: 100%;
-  max-height: 100%;
-  border-radius: var(--radius-sm);
-  background: #000;
-  box-shadow: var(--shadow-md, 0 8px 28px rgba(0, 0, 0, 0.18));
-}
-.audio-card {
-  display: flex;
-  flex-direction: column;
-  align-items: center;
-  gap: 10px;
-  padding: 28px 32px;
-  background: var(--bg-panel);
-  border: 1px solid var(--border);
-  border-radius: var(--radius);
-  min-width: 280px;
-  max-width: 480px;
-  width: 100%;
-  box-shadow: var(--shadow-xs);
-  color: var(--text-muted);
-}
-.audio-card-name {
-  font-size: 13px;
-  font-weight: 500;
-  color: var(--text);
-  word-break: break-all;
-  text-align: center;
-}
-.audio-card audio {
-  width: 100%;
-}
-
 /* -------- Fidelity cards (prototype tab) ---------------------------- */
 .fidelity-grid {
  display: grid;
@@ -1,107 +0,0 @@
-/**
- * Single source of truth for the media-generation model registry.
- *
- * Both the frontend (NewProjectPanel model pickers) and the daemon
- * (od media generate dispatcher) consume this list. When you add a new
- * model entry here, the picker shows it AND the daemon can route to it —
- * the unifying contract is "skills + metadata + prompt → code agent →
- * od media generate", and this file pins down what `--model` IDs the
- * agent is allowed to pass.
- *
- * The daemon imports the JSON view of this file via fs.readFile so we
- * don't fork the registry between frontend and Node code paths.
- */
-
-import type { AudioKind, MediaAspect } from '../types';
-
-export interface MediaModel {
-  /** Stable ID used in metadata.imageModel / videoModel / audioModel. */
-  id: string;
-  /** Short label shown in pickers — usually equals id. */
-  label: string;
-  /** Vendor / context hint shown under the label. */
-  hint: string;
-  /**
-   * Capabilities the agent may rely on when planning. Used downstream by
-   * the dispatcher to decide which provider call to make.
-   */
-  caps?: string[];
-}
-
-export const IMAGE_MODELS: MediaModel[] = [
-  { id: 'gpt-image-2', label: 'gpt-image-2', hint: 'OpenAI · default', caps: ['t2i', 'i2i', 'inpaint'] },
-  { id: 'flux-1.1-pro', label: 'flux-1.1-pro', hint: 'Black Forest Labs', caps: ['t2i', 'i2i'] },
-  { id: 'imagen-4', label: 'imagen-4', hint: 'Google', caps: ['t2i'] },
-  { id: 'midjourney-v7', label: 'midjourney-v7', hint: 'Midjourney', caps: ['t2i'] },
-];
-
-export const VIDEO_MODELS: MediaModel[] = [
-  { id: 'seedance-2', label: 'seedance-2', hint: 'ByteDance · default', caps: ['t2v', 'i2v'] },
-  { id: 'kling-3', label: 'kling-3', hint: 'Kuaishou', caps: ['t2v', 'i2v'] },
-  { id: 'kling-4', label: 'kling-4', hint: 'Kuaishou · latest', caps: ['t2v', 'i2v'] },
-  { id: 'veo-3', label: 'veo-3', hint: 'Google', caps: ['t2v'] },
-  { id: 'sora-2', label: 'sora-2', hint: 'OpenAI', caps: ['t2v'] },
-];
-
-export const AUDIO_MODELS_BY_KIND: Record<AudioKind, MediaModel[]> = {
-  music: [
-    { id: 'suno-v5', label: 'suno-v5', hint: 'Suno · default', caps: ['music'] },
-    { id: 'udio-v2', label: 'udio-v2', hint: 'Udio', caps: ['music'] },
-    { id: 'lyria-2', label: 'lyria-2', hint: 'Google', caps: ['music'] },
-  ],
-  speech: [
-    { id: 'minimax-tts', label: 'minimax-tts', hint: 'MiniMax · default', caps: ['tts'] },
-    { id: 'fish-speech-2', label: 'fish-speech-2', hint: 'FishAudio', caps: ['tts', 'voice-clone'] },
-    { id: 'elevenlabs-v3', label: 'elevenlabs-v3', hint: 'ElevenLabs', caps: ['tts', 'voice-clone'] },
-  ],
-  sfx: [
-    { id: 'elevenlabs-sfx', label: 'elevenlabs-sfx', hint: 'ElevenLabs SFX', caps: ['sfx'] },
-    { id: 'audiocraft', label: 'audiocraft', hint: 'Meta · open', caps: ['sfx', 'music'] },
-  ],
-};
-
-export const MEDIA_ASPECTS: MediaAspect[] = ['1:1', '16:9', '9:16', '4:3', '3:4'];
-
-export const VIDEO_LENGTHS_SEC: number[] = [3, 5, 8, 10, 15, 30];
-export const AUDIO_DURATIONS_SEC: number[] = [5, 10, 15, 30, 60, 120];
-
-export const DEFAULT_IMAGE_MODEL = IMAGE_MODELS[0]!.id;
-export const DEFAULT_VIDEO_MODEL = VIDEO_MODELS[0]!.id;
-export const DEFAULT_AUDIO_MODEL: Record<AudioKind, string> = {
-  music: AUDIO_MODELS_BY_KIND.music[0]!.id,
-  speech: AUDIO_MODELS_BY_KIND.speech[0]!.id,
-  sfx: AUDIO_MODELS_BY_KIND.sfx[0]!.id,
-};
-
-/**
- * Look up a model record across all surfaces by ID. Returns null if the
- * agent passes an unknown model — the dispatcher rejects with a clear
- * error so the agent re-plans instead of silently falling back.
- */
-export function findMediaModel(id: string): MediaModel | null {
-  const all: MediaModel[] = [
-    ...IMAGE_MODELS,
-    ...VIDEO_MODELS,
-    ...AUDIO_MODELS_BY_KIND.music,
-    ...AUDIO_MODELS_BY_KIND.speech,
-    ...AUDIO_MODELS_BY_KIND.sfx,
-  ];
-  return all.find((m) => m.id === id) ?? null;
-}
-
-/** All model IDs grouped by surface, used for prompt-side disclosure. */
-export function modelIdsBySurface(): {
-  image: string[];
-  video: string[];
-  audio: { music: string[]; speech: string[]; sfx: string[] };
-} {
-  return {
-    image: IMAGE_MODELS.map((m) => m.id),
-    video: VIDEO_MODELS.map((m) => m.id),
-    audio: {
-      music: AUDIO_MODELS_BY_KIND.music.map((m) => m.id),
-      speech: AUDIO_MODELS_BY_KIND.speech.map((m) => m.id),
-      sfx: AUDIO_MODELS_BY_KIND.sfx.map((m) => m.id),
-    },
-  };
-}
@@ -1,135 +0,0 @@
-/**
- * Media generation contract. Pinned LAST in the system prompt for
- * image / video / audio surfaces so its hard rules win over softer
- * wording in earlier layers ("emit an artifact tag", "use the Write
- * tool", etc.).
- *
- * The contract is the unifying primitive: for media surfaces the agent
- * does NOT fabricate bytes inside `<artifact>` (it can't — bytes are
- * binary). Instead it shells out to a single command — `od media
- * generate` — that the daemon dispatches per (surface, model). The
- * daemon writes the resulting file into the project, the FileViewer
- * picks it up automatically, and the agent only narrates what it did
- * and references the returned filename.
- *
- * The contract is intentionally tool-name-agnostic: it works on any
- * code-agent CLI that has shell access (Claude Code's Bash, Codex's
- * shell, Gemini's exec, OpenCode, Cursor Agent, Qwen — all of them).
- * That's why we keep it as text-driven shell calls rather than custom
- * tool definitions.
- */
-import {
-  AUDIO_MODELS_BY_KIND,
-  IMAGE_MODELS,
-  VIDEO_MODELS,
-} from '../media/models';
-
-function fmtList(ids: string[]): string {
-  return ids.map((id) => `\`${id}\``).join(', ');
-}
-
-const IMAGE_IDS = fmtList(IMAGE_MODELS.map((m) => m.id));
-const VIDEO_IDS = fmtList(VIDEO_MODELS.map((m) => m.id));
-const AUDIO_MUSIC_IDS = fmtList(AUDIO_MODELS_BY_KIND.music.map((m) => m.id));
-const AUDIO_SPEECH_IDS = fmtList(AUDIO_MODELS_BY_KIND.speech.map((m) => m.id));
-const AUDIO_SFX_IDS = fmtList(AUDIO_MODELS_BY_KIND.sfx.map((m) => m.id));
-
-export const MEDIA_GENERATION_CONTRACT = `
---
-
-## Media generation contract (load-bearing — overrides softer wording above)
-
-This project is a **non-web** surface (image / video / audio). The unifying
-contract is: skill workflow + project metadata tell you WHAT to make; one
-shell command — \`od media generate\` — is HOW you actually produce bytes.
-Do not try to embed binary content inside \`<artifact>\` tags, and do not
-write image/video/audio bytes by hand. Always call out to the dispatcher.
-
-### Environment the daemon injected for you
-
-The daemon spawns you with these env vars set (verify with \`echo\`):
-
- \`OD_BIN\`         — absolute path to the \`od\` CLI script. Run with \`node "$OD_BIN" …\`.
- \`OD_PROJECT_ID\`  — the active project's id. Pass it as \`--project "$OD_PROJECT_ID"\`.
- \`OD_PROJECT_DIR\` — the project's files folder (your cwd). Generated files land here.
- \`OD_DAEMON_URL\`  — base URL of the local daemon, e.g. \`http://127.0.0.1:7456\`.
-
-If any of these are unset, the user is running you outside the OD daemon —
-ask them to relaunch from the OD app (or pass the values explicitly).
-
-### Invocation
-
-Run via your shell tool (Bash on Claude Code, exec on Codex/Gemini, etc.):
-
-\`\`\`bash
-node "$OD_BIN" media generate \\
-  --project "$OD_PROJECT_ID" \\
-  --surface <image|video|audio> \\
-  --model <model-id> \\
-  --output <filename> \\
-  --prompt "<full prompt>" \\
-  [--aspect 1:1|16:9|9:16|4:3|3:4] \\
-  [--length <seconds>]              # video only
-  [--duration <seconds>]            # audio only
-  [--audio-kind music|speech|sfx]   # audio only
-  [--voice <voice-id>]              # audio:speech only
-\`\`\`
-
-The command prints a single line of JSON describing the written file:
-
-\`\`\`json
-{ "file": { "name": "poster.png", "size": 12345, "kind": "image", "mime": "image/png", ... } }
-\`\`\`
-
-Save the \`file.name\` and reference it in your reply ("I generated
-\`poster.png\`."). The user's FileViewer renders it automatically.
-
-### Allowed model IDs (per surface)
-
- **image**:   ${IMAGE_IDS}
- **video**:   ${VIDEO_IDS}
- **audio · music**:  ${AUDIO_MUSIC_IDS}
- **audio · speech**: ${AUDIO_SPEECH_IDS}
- **audio · sfx**:    ${AUDIO_SFX_IDS}
-
-If the user requests a model that is not in this list, surface a warning
-in your reply and either (a) ask them to pick a registered ID or (b)
-proceed with the project metadata's default model and explain the
-substitution. Do not silently fall back.
-
-### Workflow rules
-
-1. **Read project metadata first.** The "Project metadata" block above
-   tells you the user's pre-selected model, aspect, length, voice, audio
-   kind, etc. Treat those as authoritative defaults — only override if
-   the user's chat message explicitly contradicts them.
-2. **One discovery turn before generating.** Even with metadata defaults
-   present, restate what you're about to make and ask one targeted
-   question if anything is ambiguous (subject, mood, brand, voice). The
-   discovery rules from the philosophy layer still apply — emit a
-   question form on turn 1 unless the user's prompt already pins every
-   variable.
-3. **Generate by shell, narrate in chat.** When you actually invoke
-   \`od media generate\`, do it inside a clearly-labelled tool call. After
-   it returns, write a short reply: what was produced, the filename,
-   and any notes (model substitutions, retries, follow-up suggestions).
-4. **Iterate by re-running.** To revise, call \`od media generate\` again
-   with a new \`--output\` filename (or omit \`--output\` to auto-name).
-   Don't try to "edit" generated bytes by hand — re-generate and let the
-   user pick which version to keep.
-5. **Don't emit \`<artifact>\` blocks for media.** They're for HTML/text
-   artifacts. For media surfaces your "artifact" is the file written by
-   the dispatcher. The artifact lint and PDF-stitching layers don't
-   apply.
-6. **Filenames are slugged.** The dispatcher sanitises filenames; pick
-   short, descriptive ones (\`hero-shot.png\`, \`intro-jingle.mp3\`,
-   \`teaser-15s.mp4\`) so the user's file list stays readable.
-
-### Stub-provider note
-
-The provider integrations behind specific models (gpt-image-2,
-seedance-2, suno-v5, …) may still be stubs in this build — the
-dispatcher will return success and a placeholder file. That's fine: the
-contract you follow is the same; the bytes get sharper as real
-provider integrations land. The user has been told to expect this.
-`;
@@ -33,22 +33,13 @@ import type { ProjectMetadata, ProjectTemplate } from '../types';
 import { OFFICIAL_DESIGNER_PROMPT } from './official-system';
 import { DISCOVERY_AND_PHILOSOPHY } from './discovery';
 import { DECK_FRAMEWORK_DIRECTIVE } from './deck-framework';
-import { MEDIA_GENERATION_CONTRACT } from './media-contract';

 export const BASE_SYSTEM_PROMPT = OFFICIAL_DESIGNER_PROMPT;

 export interface ComposeInput {
  skillBody?: string | undefined;
  skillName?: string | undefined;
-  skillMode?:
-    | 'prototype'
-    | 'deck'
-    | 'template'
-    | 'design-system'
-    | 'image'
-    | 'video'
-    | 'audio'
-    | undefined;
+  skillMode?: 'prototype' | 'deck' | 'template' | 'design-system' | undefined;
  designSystemBody?: string | undefined;
  designSystemTitle?: string | undefined;
  // Project-level metadata captured by the new-project panel. Drives the
@@ -120,24 +111,6 @@ export function composeSystemPrompt({
    parts.push(`\n\n---\n\n${DECK_FRAMEWORK_DIRECTIVE}`);
  }

-  // Image / video / audio surfaces share one invocation contract:
-  // `od media generate`. We pin it LAST (and only when the project is
-  // actually a media surface) so its rules ("don't fabricate bytes",
-  // "shell out to OD_BIN", "reference the returned filename") override
-  // any softer wording earlier in the stack about emitting <artifact>
-  // tags. We fire on either skillMode OR metadata.kind so a media
-  // project without a bound skill still gets the contract.
-  const isMediaSurface =
-    skillMode === 'image' ||
-    skillMode === 'video' ||
-    skillMode === 'audio' ||
-    metadata?.kind === 'image' ||
-    metadata?.kind === 'video' ||
-    metadata?.kind === 'audio';
-  if (isMediaSurface) {
-    parts.push(MEDIA_GENERATION_CONTRACT);
-  }
-
  return parts.join('');
 }

@@ -172,56 +145,6 @@ function renderMetadataBlock(
      lines.push(`- **template**: ${metadata.templateLabel}`);
    }
  }
-  if (metadata.kind === 'image') {
-    lines.push(
-      `- **imageModel**: ${metadata.imageModel ?? '(unknown — ask: which image model to use)'}`,
-    );
-    lines.push(
-      `- **aspectRatio**: ${metadata.imageAspect ?? '(unknown — ask: 1:1, 16:9, 9:16, 4:3, 3:4)'}`,
-    );
-    if (metadata.imageStyle) {
-      lines.push(`- **styleNotes**: ${metadata.imageStyle}`);
-    }
-    lines.push('');
-    lines.push(
-      'This is an **image** project. Plan the prompt carefully — describe subject, composition, lighting, palette, and references — then dispatch via the **media generation contract** (see the contract block at the end of this prompt) using `od media generate --surface image --model <imageModel>`. Reference the returned filename in your reply. Do NOT emit `<artifact>` HTML for media surfaces.',
-    );
-  }
-  if (metadata.kind === 'video') {
-    lines.push(
-      `- **videoModel**: ${metadata.videoModel ?? '(unknown — ask: which video model to use)'}`,
-    );
-    lines.push(
-      `- **lengthSeconds**: ${typeof metadata.videoLength === 'number' ? metadata.videoLength : '(unknown — ask: 3s / 5s / 10s)'}`,
-    );
-    lines.push(
-      `- **aspectRatio**: ${metadata.videoAspect ?? '(unknown — ask: 16:9, 9:16, 1:1)'}`,
-    );
-    lines.push('');
-    lines.push(
-      'This is a **video** project. Plan the shotlist (1-3 shots for short clips), describe motion + camera, then dispatch via the **media generation contract** using `od media generate --surface video --model <videoModel> --length <seconds> --aspect <ratio>`. If the active workspace also ships a hyperframes-style interactive-video skill, prefer composing several shorter clips into a timeline rather than one monolithic generation. Do NOT emit `<artifact>` HTML.',
-    );
-  }
-  if (metadata.kind === 'audio') {
-    lines.push(
-      `- **audioKind**: ${metadata.audioKind ?? '(unknown — ask: music / speech / sfx)'}`,
-    );
-    lines.push(
-      `- **audioModel**: ${metadata.audioModel ?? '(unknown — ask: which audio model to use)'}`,
-    );
-    lines.push(
-      `- **durationSeconds**: ${typeof metadata.audioDuration === 'number' ? metadata.audioDuration : '(unknown — ask: target duration)'}`,
-    );
-    if (metadata.voice) {
-      lines.push(`- **voice**: ${metadata.voice}`);
-    } else if (metadata.audioKind === 'speech') {
-      lines.push('- **voice**: (unknown — ask: voice / accent / pacing)');
-    }
-    lines.push('');
-    lines.push(
-      'This is an **audio** project. Music: lock genre + tempo + instrumentation. Speech: confirm script + voice + pacing. SFX: be precise about texture (impact, ambience, foley layer). Then dispatch via the **media generation contract** using `od media generate --surface audio --audio-kind <kind> --model <audioModel> --duration <seconds>` (add `--voice <voice-id>` for speech). Do NOT emit `<artifact>` HTML.',
-    );
-  }

  if (metadata.inspirationDesignSystemIds && metadata.inspirationDesignSystemIds.length > 0) {
    lines.push(
@@ -30,6 +30,11 @@ export interface DaemonStreamOptions {
  // daemon resolves them inside the project folder, validates they
  // exist, and stitches them into the user message as `@<path>` hints.
  attachments?: string[];
+  // Per-CLI model + reasoning the user picked in the model menu. Both are
+  // optional; the daemon validates them against the agent's declared
+  // options and falls back to the CLI default when missing.
+  model?: string | null;
+  reasoning?: string | null;
 }

 export async function streamViaDaemon({
@@ -40,6 +45,8 @@ export async function streamViaDaemon({
  handlers,
  projectId,
  attachments,
+  model,
+  reasoning,
 }: DaemonStreamOptions): Promise<void> {
  // Local CLIs are single-turn print-mode programs, so we collapse the whole
  // chat into one string. If this becomes too noisy for long histories, the
@@ -53,6 +60,8 @@ export async function streamViaDaemon({
    message: transcript,
    projectId: projectId ?? null,
    attachments: attachments ?? [],
+    model: model ?? null,
+    reasoning: reasoning ?? null,
  });

  let acc = '';
@@ -11,6 +11,7 @@ export const DEFAULT_CONFIG: AppConfig = {
  skillId: null,
  designSystemId: null,
  onboardingCompleted: false,
+  agentModels: {},
 };

 export function loadConfig(): AppConfig {
@@ -1,5 +1,14 @@
 export type ExecMode = 'daemon' | 'api';

+// Per-CLI model + reasoning the user picked in the model menu. Each agent
+// keeps its own slot so flipping between Codex and Gemini doesn't reset the
+// other one's choice. Missing entries fall back to the agent's first
+// declared model (`'default'` — let the CLI pick).
+export interface AgentModelChoice {
+  model?: string;
+  reasoning?: string;
+}
+
 export interface AppConfig {
  mode: ExecMode;
  apiKey: string;
@@ -12,6 +21,10 @@ export interface AppConfig {
  // least once (saved or skipped). Bootstrap skips the auto-popup when
  // this is set so refreshing the page doesn't re-prompt.
  onboardingCompleted?: boolean;
+  // Per-CLI model picker state, keyed by agent id (e.g. `gemini`, `codex`).
+  // Pre-existing configs without this field fall through to the agent's
+  // declared default.
+  agentModels?: Record<string, AgentModelChoice>;
 }

 export type AgentEvent =
@@ -63,6 +76,11 @@ export interface ExamplePreview {
  html: string;
 }

+export interface AgentModelOption {
+  id: string;
+  label: string;
+}
+
 export interface AgentInfo {
  id: string;
  name: string;
@@ -70,36 +88,20 @@ export interface AgentInfo {
  available: boolean;
  path?: string;
  version?: string | null;
+  // Models surfaced in the model picker for this CLI. The first entry is
+  // treated as the default (typically the synthetic `'default'` option,
+  // meaning "let the CLI use whatever's in its own config").
+  models?: AgentModelOption[];
+  // Reasoning-effort presets — currently only Codex exposes this.
+  reasoningOptions?: AgentModelOption[];
 }

-// The four "surfaces" Open Design now produces. Web covers HTML
-// prototypes, decks, docs and templates; Image / Video / Audio cover
-// generated visual / motion / sound artifacts respectively. Every skill
-// and every design system declares one surface; the Examples and
-// Design-systems tabs filter by it so users can navigate the multi-modal
-// catalog without scrolling past surfaces they're not interested in.
-export type Surface = 'web' | 'image' | 'video' | 'audio';
-
 export interface SkillSummary {
  id: string;
  name: string;
  description: string;
  triggers: string[];
-  // 'design-system' is a meta-mode used by the design-systems registry,
-  // not by user-facing skills; the rest map 1:1 onto ProjectKind. Image
-  // / video / audio modes drive the matching project kind so the
-  // 'Use this prompt' fast-create produces a coherent media project.
-  mode:
-    | 'prototype'
-    | 'deck'
-    | 'template'
-    | 'design-system'
-    | 'image'
-    | 'video'
-    | 'audio';
-  /** Which output surface the skill targets — defaults to 'web' for
-   *  backward compatibility when SKILL.md doesn't declare `od.surface`. */
-  surface?: Surface;
+  mode: 'prototype' | 'deck' | 'template' | 'design-system';
  platform?: 'desktop' | 'mobile' | null;
  scenario?: string | null;
  previewType: string;
@@ -135,10 +137,6 @@ export interface DesignSystemSummary {
  /** 4 representative hex strings extracted from DESIGN.md: [bg, support, fg, accent].
   *  Empty when DESIGN.md doesn't expose its tokens in the bold-and-hex format. */
  swatches?: string[];
-  /** Which surface the system targets. Web is the default — most ship
-   *  HTML/CSS tokens. Image / video / audio systems carry palettes,
-   *  shotlists, voice presets etc. that drive non-web generations. */
-  surface?: Surface;
 }

 export interface DesignSystemDetail extends DesignSystemSummary {
@@ -148,8 +146,6 @@ export interface DesignSystemDetail extends DesignSystemSummary {
 export type ProjectFileKind =
  | 'html'
  | 'image'
-  | 'video'
-  | 'audio'
  | 'sketch'
  | 'text'
  | 'code'
@@ -175,28 +171,7 @@ export interface ProjectFile {
 // Per-project metadata captured at creation time. The agent reads this
 // during chat (via the system prompt) and the question-form re-asks for
 // any field that's missing. Each `kind` carries a different shape.
-//
-// 'prototype' / 'deck' / 'template' / 'other' all live on the Web
-// surface; 'image' / 'video' / 'audio' are the new media surfaces.
-export type ProjectKind =
-  | 'prototype'
-  | 'deck'
-  | 'template'
-  | 'other'
-  | 'image'
-  | 'video'
-  | 'audio';
-
-// Aspect ratios offered to image / video projects. Kept as a small fixed
-// vocabulary (vs free-form WxH) so the system prompt can describe them
-// to the agent in concrete terms, and so we can render fixed thumbnails
-// in the picker without a custom-input branch.
-export type MediaAspect = '1:1' | '16:9' | '9:16' | '4:3' | '3:4';
-
-// Audio kind — what *kind* of sound the user wants. The model + prompt
-// pattern differ noticeably between music (Suno-style), TTS (MiniMax,
-// Fish), and SFX/foley, so we capture the intent at create time.
-export type AudioKind = 'music' | 'speech' | 'sfx';
+export type ProjectKind = 'prototype' | 'deck' | 'template' | 'other';

 export interface ProjectMetadata {
  kind: ProjectKind;
@@ -221,35 +196,6 @@ export interface ProjectMetadata {
  // generated artifact should *also* draw from. Empty / undefined when the
  // user stayed in single-select mode.
  inspirationDesignSystemIds?: string[];
-
-  // -- Image projects ------------------------------------------------
-  // The model the user wants generations to flow through. We keep this
-  // as a free-form string (rather than a strict enum) so new providers
-  // can be wired up by editing skills alone, without a frontend change.
-  imageModel?: string;
-  // Aspect ratio. Defaults to 1:1 if unset. Drives the canvas the agent
-  // requests from the underlying image API.
-  imageAspect?: MediaAspect;
-  // Free-form palette / mood hint. Carried into the system prompt so the
-  // agent can echo the user's style intent into the upstream prompt.
-  imageStyle?: string;
-
-  // -- Video projects ------------------------------------------------
-  videoModel?: string;
-  // Length in seconds. Most providers cap at 10s today; we don't enforce
-  // here — the skill body is the right place to clamp by model.
-  videoLength?: number;
-  videoAspect?: MediaAspect;
-
-  // -- Audio projects ------------------------------------------------
-  audioKind?: AudioKind;
-  audioModel?: string;
-  // Duration in seconds. Music generators interpret this as song length;
-  // TTS uses it as an upper bound on the spoken passage.
-  audioDuration?: number;
-  // Free-form voice description for TTS (e.g. "warm female narrator,
-  // British English"). Ignored for music / SFX.
-  voice?: string;
 }

 export interface Project {