feat(media): add image/video/audio project kinds via od media generate
Introduce non-web media surfaces (image, video, audio) as first-class project kinds. The unifying contract is "skill workflow + project metadata tell the agent WHAT to make; one shell command — od media generate — is HOW bytes are produced", so any code-agent CLI with shell access can drive it without bespoke tools. - Frontend: New Project panel gains Image/Video/Audio tabs with model picker, aspect/length/duration controls, and audio kind/voice selection. Examples and Design Systems tabs gain layered sections. FileViewer renders the generated image/video/audio files. - Shared registry: src/media/models.ts is the single source of truth for image/video/audio model IDs, aspects, and defaults — consumed by the picker AND the daemon dispatcher. - Prompts: media-contract.ts is pinned LAST in the system prompt for media surfaces so its hard rules (call od media generate, don't emit binary in <artifact>, allowed model IDs) win over softer earlier wording. - Daemon: new media.js dispatcher + media-models.js JSON view of the registry; cli.js gets the `od media generate` subcommand wired up via server.js / projects.js so the daemon writes files back into the project dir. - Skills: audio-jingle, image-poster, video-shortform seed examples for the three surfaces. Made-with: Cursor
This commit is contained in:
@@ -25,12 +25,16 @@ export async function listSkills(skillsRoot) {
|
||||
const { data, body } = parseFrontmatter(raw);
|
||||
const hasAttachments = await dirHasAttachments(dir);
|
||||
const mode = data.od?.mode || inferMode(body, data.description);
|
||||
const surface = normalizeSurface(data.od?.surface, mode);
|
||||
out.push({
|
||||
id: data.name || entry.name,
|
||||
name: data.name || entry.name,
|
||||
description: data.description || "",
|
||||
triggers: Array.isArray(data.triggers) ? data.triggers : [],
|
||||
mode,
|
||||
// Surface defaults to inferring from `mode` so legacy SKILL.md
|
||||
// files (no `od.surface` declared) keep classifying correctly.
|
||||
surface,
|
||||
platform: normalizePlatform(
|
||||
data.od?.platform,
|
||||
mode,
|
||||
@@ -159,6 +163,20 @@ function inferMode(body, description) {
|
||||
return "prototype";
|
||||
}
|
||||
|
||||
// Surface is the high-level output bucket — web, image, video or audio.
|
||||
// Authors can pin it via `od.surface`; otherwise we derive from `mode`,
|
||||
// then fall back to the safe default ('web') so existing skills classify
|
||||
// unchanged.
|
||||
const KNOWN_SURFACES = new Set(["web", "image", "video", "audio"]);
|
||||
function normalizeSurface(value, mode) {
|
||||
if (typeof value === "string") {
|
||||
const v = value.trim().toLowerCase();
|
||||
if (KNOWN_SURFACES.has(v)) return v;
|
||||
}
|
||||
if (mode === "image" || mode === "video" || mode === "audio") return mode;
|
||||
return "web";
|
||||
}
|
||||
|
||||
// Validate platform tag — only desktop / mobile are meaningful for the
|
||||
// Examples gallery. Falls back to autodetecting "mobile" from descriptions
|
||||
// so legacy skills sort under the right pill without authoring changes.
|
||||
|
||||
Reference in New Issue
Block a user