t1k-config-multimodal.json

registryVersion: 1 · priority: 91

Multimodal skill config — model defaults for Gemini, Imagen, Veo, MiniMax.

Raw JSON (ground truth)

{
  "description": "Multimodal skill config — model defaults for Gemini, Imagen, Veo, MiniMax.",
  "kitName": "theonekit-core",
  "mcp": {
    "optional": [
      {
        "installCmd": "claude mcp add human-mcp -- npx -y github:The1Studio/human-mcp#v2.15.1",
        "name": "human-mcp",
        "purpose": "In-loop multimodal analysis and generation — image/video analysis, Gemini image gen, video gen. Complements the t1k:extended-multimodal skill for interactive (non-batch) use. Requires GOOGLE_API_KEY env var.",
        "verifyTool": "mcp__human-mcp__eyes_analyze"
      }
    ],
    "recommended": [],
    "required": []
  },
  "moduleScope": "t1k-extended-multimodal",
  "multimodal": {
    "gemini": {
      "analysisModel": "gemini-2.5-flash",
      "generationModel": "gemini-3.1-flash-image-preview",
      "proModel": "gemini-2.5-pro",
      "videoModel": "veo-3.1-generate-preview"
    },
    "imagen": {
      "fastModel": "imagen-4.0-fast-generate-001",
      "standardModel": "imagen-4.0-generate-001",
      "ultraModel": "imagen-4.0-ultra-generate-001"
    },
    "minimax": {
      "imageModel": "image-01",
      "musicModel": "music-2.5",
      "speechModel": "speech-2.8-hd",
      "videoModel": "MiniMax-Hailuo-2.3"
    }
  },
  "priority": 91,
  "registryVersion": 1
}