ac70719d4d
Extends Open Design from web-only to a multi-modal creation tool. The unifying contract is one code-agent loop driven by skills + project metadata + prompt constraints; for non-web surfaces the agent shells out to a single dispatcher (`od media generate`) that the daemon routes per (surface, model). - Types: new Surface union, MediaAspect / AudioKind, image/video/audio ProjectKind + ProjectMetadata fields, video/audio ProjectFileKind. - NewProjectPanel: top-level surface picker + Image / Video / Audio forms with model, aspect, length, duration, voice, audio-kind pickers. - ExamplesTab + DesignSystemsTab: surface filter row that scopes before mode / scenario / category filters. - FileViewer / FileWorkspace: native <video> and <audio> previews and matching tab icons. - Daemon: parses `od.surface` and `> Surface:` blockquotes; recognises mp4 / webm / mov / mp3 / wav / ogg / m4a / flac extensions; spawns agents with OD_BIN / OD_DAEMON_URL / OD_PROJECT_ID / OD_PROJECT_DIR env so any code-agent CLI with shell access can call the dispatcher. - daemon/media.js + daemon/media-models.js: surface-agnostic dispatcher with stub providers that emit deterministic placeholder bytes (1x1 PNG, valid mp4 ftyp, mp3 frame / silent WAV) so the framework works without API keys; real provider integrations slot in later. - daemon/cli.js: `od media generate --surface ... --model ...` subcommand routes to POST /api/projects/:id/media/generate and prints one JSON line for the agent to parse. - prompts/media-contract.ts: hard contract pinned LAST in the system prompt for image/video/audio surfaces — env vars, exact invocation, registered model IDs per surface, six workflow rules. system.ts metadata block updated to point at the contract. - Seed skills: image-poster, video-shortform, audio-jingle each ship a SKILL.md with `mode/surface: image|video|audio` and a stylized example.html preview, and instruct the agent to dispatch via the contract. Made-with: Cursor
63 lines
2.7 KiB
JavaScript
63 lines
2.7 KiB
JavaScript
// Daemon-side mirror of src/media/models.ts. We keep this in plain JS so
|
|
// node imports are native and the daemon never needs a TS toolchain at
|
|
// runtime. The two files are kept in sync by review — any model added to
|
|
// src/media/models.ts must be added here too. Tests in verify ensure the
|
|
// arrays are non-empty and IDs are unique.
|
|
|
|
export const IMAGE_MODELS = [
|
|
{ id: 'gpt-image-2', label: 'gpt-image-2', hint: 'OpenAI · default', caps: ['t2i', 'i2i', 'inpaint'] },
|
|
{ id: 'flux-1.1-pro', label: 'flux-1.1-pro', hint: 'Black Forest Labs', caps: ['t2i', 'i2i'] },
|
|
{ id: 'imagen-4', label: 'imagen-4', hint: 'Google', caps: ['t2i'] },
|
|
{ id: 'midjourney-v7', label: 'midjourney-v7', hint: 'Midjourney', caps: ['t2i'] },
|
|
];
|
|
|
|
export const VIDEO_MODELS = [
|
|
{ id: 'seedance-2', label: 'seedance-2', hint: 'ByteDance · default', caps: ['t2v', 'i2v'] },
|
|
{ id: 'kling-3', label: 'kling-3', hint: 'Kuaishou', caps: ['t2v', 'i2v'] },
|
|
{ id: 'kling-4', label: 'kling-4', hint: 'Kuaishou · latest', caps: ['t2v', 'i2v'] },
|
|
{ id: 'veo-3', label: 'veo-3', hint: 'Google', caps: ['t2v'] },
|
|
{ id: 'sora-2', label: 'sora-2', hint: 'OpenAI', caps: ['t2v'] },
|
|
];
|
|
|
|
export const AUDIO_MODELS_BY_KIND = {
|
|
music: [
|
|
{ id: 'suno-v5', label: 'suno-v5', hint: 'Suno · default', caps: ['music'] },
|
|
{ id: 'udio-v2', label: 'udio-v2', hint: 'Udio', caps: ['music'] },
|
|
{ id: 'lyria-2', label: 'lyria-2', hint: 'Google', caps: ['music'] },
|
|
],
|
|
speech: [
|
|
{ id: 'minimax-tts', label: 'minimax-tts', hint: 'MiniMax · default', caps: ['tts'] },
|
|
{ id: 'fish-speech-2', label: 'fish-speech-2', hint: 'FishAudio', caps: ['tts', 'voice-clone'] },
|
|
{ id: 'elevenlabs-v3', label: 'elevenlabs-v3', hint: 'ElevenLabs', caps: ['tts', 'voice-clone'] },
|
|
],
|
|
sfx: [
|
|
{ id: 'elevenlabs-sfx', label: 'elevenlabs-sfx', hint: 'ElevenLabs SFX', caps: ['sfx'] },
|
|
{ id: 'audiocraft', label: 'audiocraft', hint: 'Meta · open', caps: ['sfx', 'music'] },
|
|
],
|
|
};
|
|
|
|
export const MEDIA_ASPECTS = ['1:1', '16:9', '9:16', '4:3', '3:4'];
|
|
export const VIDEO_LENGTHS_SEC = [3, 5, 8, 10, 15, 30];
|
|
export const AUDIO_DURATIONS_SEC = [5, 10, 15, 30, 60, 120];
|
|
|
|
export function findMediaModel(id) {
|
|
const all = [
|
|
...IMAGE_MODELS,
|
|
...VIDEO_MODELS,
|
|
...AUDIO_MODELS_BY_KIND.music,
|
|
...AUDIO_MODELS_BY_KIND.speech,
|
|
...AUDIO_MODELS_BY_KIND.sfx,
|
|
];
|
|
return all.find((m) => m.id === id) || null;
|
|
}
|
|
|
|
export function modelsForSurface(surface, audioKind) {
|
|
if (surface === 'image') return IMAGE_MODELS;
|
|
if (surface === 'video') return VIDEO_MODELS;
|
|
if (surface === 'audio') {
|
|
const k = audioKind || 'music';
|
|
return AUDIO_MODELS_BY_KIND[k] || AUDIO_MODELS_BY_KIND.music;
|
|
}
|
|
return [];
|
|
}
|