Optimize MCP tool token usage with schema reference pattern

Reduces MCP tool token consumption by ~90% through progressive disclosure. Tools now show minimal schemas with get_schema() for details on demand.
2025-12-17 22:47:30 -05:00
parent c2742d5664
commit c4af31f48d
6 changed files with 366 additions and 134 deletions
@@ -212,3 +212,118 @@ help(topic="all")  # Get complete guide
 - ALWAYS get timeline context to understand what was happening
 - ALWAYS use `get_observations` when fetching 2+ observations
 - The workflow is optimized: search → timeline → batch fetch = 10-100x faster
+
+---
+
+## Tool Reference
+
+Comprehensive parameter documentation for all memory tools. For MCP usage, call `help(topic="search")` to load specific tool docs.
+
+### search
+
+Search across all memory types (observations, sessions, prompts).
+
+**Parameters:**
+
+- `query` (string, optional) - Search term for full-text search
+- `limit` (number, optional) - Maximum results to return. Default: 20, Max: 100
+- `offset` (number, optional) - Number of results to skip. Default: 0
+- `project` (string, required) - Project name to filter by
+- `type` (string, optional) - Filter by type: "observations", "sessions", "prompts"
+- `dateStart` (string, optional) - Start date filter (YYYY-MM-DD or epoch ms)
+- `dateEnd` (string, optional) - End date filter (YYYY-MM-DD or epoch ms)
+- `obs_type` (string, optional) - Filter observations by type (comma-separated): bugfix, feature, decision, discovery, change
+- `orderBy` (string, optional) - Sort order: "date_desc" (default), "date_asc", "relevance"
+
+**Returns:** Table of results with IDs, timestamps, types, titles
+
+### timeline
+
+Get chronological context around a specific point in time or observation.
+
+**Parameters:**
+
+- `anchor` (number, optional) - Observation ID to center timeline around. If not provided, uses most recent result from query
+- `query` (string, optional) - Search term to find anchor automatically (if anchor not provided)
+- `depth_before` (number, optional) - Items before anchor. Default: 5, Max: 20
+- `depth_after` (number, optional) - Items after anchor. Default: 5, Max: 20
+- `project` (string, required) - Project name to filter by
+
+**Returns:** Exactly `depth_before + 1 + depth_after` items in chronological order, with observations, sessions, and prompts interleaved
+
+### get_recent_context
+
+Get the most recent observations from current or recent sessions.
+
+**Parameters:**
+
+- `limit` (number, optional) - Maximum observations to return. Default: 10, Max: 50
+- `project` (string, required) - Project name to filter by
+
+**Returns:** Recent observations in reverse chronological order
+
+### get_context_timeline
+
+Get timeline context around a specific observation ID.
+
+**Parameters:**
+
+- `anchor` (number, required) - Observation ID to center timeline around
+- `depth_before` (number, optional) - Items before anchor. Default: 5, Max: 20
+- `depth_after` (number, optional) - Items after anchor. Default: 5, Max: 20
+- `project` (string, optional) - Project name to filter by
+
+**Returns:** Timeline items centered on the anchor observation
+
+### get_observation
+
+Fetch a single observation by ID with full details.
+
+**Parameters:**
+
+- `id` (number, required) - Observation ID to fetch
+
+**Returns:** Complete observation object with title, subtitle, narrative, facts, concepts, files, timestamps
+
+### get_observations
+
+Batch fetch multiple observations by IDs. Always prefer this over individual fetches for 2+ observations.
+
+**Parameters:**
+
+- `ids` (array of numbers, required) - Array of observation IDs to fetch
+- `orderBy` (string, optional) - Sort order: "date_desc" (default), "date_asc"
+- `limit` (number, optional) - Maximum observations to return. Default: no limit
+- `project` (string, optional) - Project name to filter by
+
+**Returns:** Array of complete observation objects, 10-100x faster than individual fetches
+
+### get_session
+
+Fetch a single session by ID with metadata.
+
+**Parameters:**
+
+- `id` (number, required) - Session ID to fetch (just the number, not "S2005" format)
+
+**Returns:** Session object with ID, start time, end time, project, model info
+
+### get_prompt
+
+Fetch a single prompt by ID with full text.
+
+**Parameters:**
+
+- `id` (number, required) - Prompt ID to fetch
+
+**Returns:** Prompt object with ID, text, timestamp, session reference
+
+### help
+
+Load detailed instructions for specific topics or all documentation.
+
+**Parameters:**
+
+- `topic` (string, optional) - Specific topic to load: "workflow", "search", "timeline", "get_recent_context", "get_context_timeline", "get_observation", "get_observations", "get_session", "get_prompt", "all". Default: "all"
+
+**Returns:** Formatted documentation for the requested topic
@@ -12,8 +12,6 @@ import {
  CallToolRequestSchema,
  ListToolsRequestSchema,
 } from '@modelcontextprotocol/sdk/types.js';
-import { z } from 'zod';
-import { zodToJsonSchema } from 'zod-to-json-schema';
 import { logger } from '../utils/logger.js';
 import { getWorkerPort, getWorkerHost } from '../shared/worker-utils.js';

@@ -35,6 +33,72 @@ const TOOL_ENDPOINT_MAP: Record<string, string> = {
  'help': '/api/instructions'
 };

+/**
+ * Detailed parameter schemas for each tool
+ */
+const TOOL_SCHEMAS: Record<string, any> = {
+  search: {
+    query: { type: 'string', description: 'Full-text search query' },
+    type: { type: 'string', description: 'Filter by type: tool_use, tool_result, prompt, summary' },
+    obs_type: { type: 'string', description: 'Observation type filter' },
+    concepts: { type: 'string', description: 'Comma-separated concept tags' },
+    files: { type: 'string', description: 'Comma-separated file paths' },
+    project: { type: 'string', description: 'Project name filter' },
+    dateStart: { type: ['string', 'number'], description: 'Start date (ISO or timestamp)' },
+    dateEnd: { type: ['string', 'number'], description: 'End date (ISO or timestamp)' },
+    limit: { type: 'number', description: 'Max results (default: 10)' },
+    offset: { type: 'number', description: 'Result offset for pagination' },
+    orderBy: { type: 'string', description: 'Sort order: created_at, relevance' }
+  },
+  timeline: {
+    query: { type: 'string', description: 'Search query to find anchor point' },
+    anchor: { type: 'number', description: 'Observation ID as timeline center' },
+    depth_before: { type: 'number', description: 'Observations before anchor (default: 5)' },
+    depth_after: { type: 'number', description: 'Observations after anchor (default: 5)' },
+    type: { type: 'string', description: 'Filter by type' },
+    concepts: { type: 'string', description: 'Comma-separated concept tags' },
+    files: { type: 'string', description: 'Comma-separated file paths' },
+    project: { type: 'string', description: 'Project name filter' }
+  },
+  get_recent_context: {
+    limit: { type: 'number', description: 'Max results (default: 20)' },
+    type: { type: 'string', description: 'Filter by type' },
+    concepts: { type: 'string', description: 'Comma-separated concept tags' },
+    files: { type: 'string', description: 'Comma-separated file paths' },
+    project: { type: 'string', description: 'Project name filter' },
+    dateStart: { type: ['string', 'number'], description: 'Start date' },
+    dateEnd: { type: ['string', 'number'], description: 'End date' }
+  },
+  get_context_timeline: {
+    anchor: { type: 'number', description: 'Observation ID (required)', required: true },
+    depth_before: { type: 'number', description: 'Observations before anchor' },
+    depth_after: { type: 'number', description: 'Observations after anchor' },
+    type: { type: 'string', description: 'Filter by type' },
+    concepts: { type: 'string', description: 'Comma-separated concept tags' },
+    files: { type: 'string', description: 'Comma-separated file paths' },
+    project: { type: 'string', description: 'Project name filter' }
+  },
+  get_observations: {
+    ids: { type: 'array', items: { type: 'number' }, description: 'Array of observation IDs (required)', required: true },
+    orderBy: { type: 'string', description: 'Sort order' },
+    limit: { type: 'number', description: 'Max results' },
+    project: { type: 'string', description: 'Project filter' }
+  },
+  help: {
+    operation: { type: 'string', description: 'Operation type: "observations", "timeline", "sessions", etc.' },
+    topic: { type: 'string', description: 'Specific topic for help' }
+  },
+  get_observation: {
+    id: { type: 'number', description: 'Observation ID (required)', required: true }
+  },
+  get_session: {
+    id: { type: 'number', description: 'Session ID (required)', required: true }
+  },
+  get_prompt: {
+    id: { type: 'number', description: 'Prompt ID (required)', required: true }
+  }
+};
+
 /**
 * Call Worker HTTP API endpoint
 */
@@ -182,25 +246,47 @@ async function verifyWorkerConnection(): Promise<boolean> {

 /**
 * Tool definitions with HTTP-based handlers
- * Descriptions removed - use progressive_description tool for parameter documentation
+ * Minimal descriptions - use help() tool with operation parameter for detailed docs
 */
 const tools = [
+  {
+    name: 'get_schema',
+    description: 'Get parameter schema for a tool. Call get_schema(tool_name) for details',
+    inputSchema: {
+      type: 'object',
+      properties: { tool_name: { type: 'string' } },
+      required: ['tool_name']
+    },
+    handler: async (args: any) => {
+      // Validate tool_name to prevent prototype pollution
+      const toolName = args.tool_name;
+      if (typeof toolName !== 'string' || !Object.hasOwn(TOOL_SCHEMAS, toolName)) {
+        return {
+          content: [{
+            type: 'text' as const,
+            text: `Unknown tool: ${toolName}\n\nAvailable tools: ${Object.keys(TOOL_SCHEMAS).join(', ')}`
+          }],
+          isError: true
+        };
+      }
+
+      const schema = TOOL_SCHEMAS[toolName];
+      return {
+        content: [{
+          type: 'text' as const,
+          text: `# ${toolName} Parameters\n\n${JSON.stringify(schema, null, 2)}`
+        }]
+      };
+    }
+  },
  {
    name: 'search',
-    description: 'Search memory',
-    inputSchema: z.object({
-      query: z.string().optional(),
-      type: z.enum(['observations', 'sessions', 'prompts']).optional(),
-      obs_type: z.string().optional(),
-      concepts: z.string().optional(),
-      files: z.string().optional(),
-      project: z.string().optional(),
-      dateStart: z.union([z.string(), z.number()]).optional(),
-      dateEnd: z.union([z.string(), z.number()]).optional(),
-      limit: z.number().min(1).max(100).default(20),
-      offset: z.number().min(0).default(0),
-      orderBy: z.enum(['relevance', 'date_desc', 'date_asc']).default('date_desc')
-    }),
+    description: 'Search memory. All parameters optional - call get_schema("search") for details',
+    inputSchema: {
+      type: 'object',
+      properties: {},
+      additionalProperties: true
+    },
    handler: async (args: any) => {
      const endpoint = TOOL_ENDPOINT_MAP['search'];
      return await callWorkerAPI(endpoint, args);
@@ -208,17 +294,12 @@ const tools = [
  },
  {
    name: 'timeline',
-    description: 'Timeline context',
-    inputSchema: z.object({
-      query: z.string().optional(),
-      anchor: z.number().optional(),
-      depth_before: z.number().min(0).max(100).default(10),
-      depth_after: z.number().min(0).max(100).default(10),
-      type: z.string().optional(),
-      concepts: z.string().optional(),
-      files: z.string().optional(),
-      project: z.string().optional()
-    }),
+    description: 'Timeline context. All parameters optional - call get_schema("timeline") for details',
+    inputSchema: {
+      type: 'object',
+      properties: {},
+      additionalProperties: true
+    },
    handler: async (args: any) => {
      const endpoint = TOOL_ENDPOINT_MAP['timeline'];
      return await callWorkerAPI(endpoint, args);
@@ -226,16 +307,12 @@ const tools = [
  },
  {
    name: 'get_recent_context',
-    description: 'Recent context',
-    inputSchema: z.object({
-      limit: z.number().min(1).max(100).default(30),
-      type: z.string().optional(),
-      concepts: z.string().optional(),
-      files: z.string().optional(),
-      project: z.string().optional(),
-      dateStart: z.union([z.string(), z.number()]).optional(),
-      dateEnd: z.union([z.string(), z.number()]).optional()
-    }),
+    description: 'Recent context. All parameters optional - call get_schema("get_recent_context") for details',
+    inputSchema: {
+      type: 'object',
+      properties: {},
+      additionalProperties: true
+    },
    handler: async (args: any) => {
      const endpoint = TOOL_ENDPOINT_MAP['get_recent_context'];
      return await callWorkerAPI(endpoint, args);
@@ -243,16 +320,18 @@ const tools = [
  },
  {
    name: 'get_context_timeline',
-    description: 'Timeline around ID',
-    inputSchema: z.object({
-      anchor: z.number(),
-      depth_before: z.number().min(0).max(100).default(10),
-      depth_after: z.number().min(0).max(100).default(10),
-      type: z.string().optional(),
-      concepts: z.string().optional(),
-      files: z.string().optional(),
-      project: z.string().optional()
-    }),
+    description: 'Timeline around observation ID',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        anchor: {
+          type: 'number',
+          description: 'Observation ID (required). Optional params: get_schema("get_context_timeline")'
+        }
+      },
+      required: ['anchor'],
+      additionalProperties: true
+    },
    handler: async (args: any) => {
      const endpoint = TOOL_ENDPOINT_MAP['get_context_timeline'];
      return await callWorkerAPI(endpoint, args);
@@ -260,10 +339,12 @@ const tools = [
  },
  {
    name: 'help',
-    description: 'Usage help',
-    inputSchema: z.object({
-      topic: z.enum(['workflow', 'search_params', 'examples', 'all']).default('all')
-    }),
+    description: 'Get detailed docs. All parameters optional - call get_schema("help") for details',
+    inputSchema: {
+      type: 'object',
+      properties: {},
+      additionalProperties: true
+    },
    handler: async (args: any) => {
      const endpoint = TOOL_ENDPOINT_MAP['help'];
      return await callWorkerAPI(endpoint, args);
@@ -271,43 +352,70 @@ const tools = [
  },
  {
    name: 'get_observation',
-    description: 'Fetch by ID',
-    inputSchema: z.object({
-      id: z.number()
-    }),
+    description: 'Fetch observation by ID',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        id: {
+          type: 'number',
+          description: 'Observation ID (required)'
+        }
+      },
+      required: ['id']
+    },
    handler: async (args: any) => {
      return await callWorkerAPIWithPath('/api/observation', args.id);
    }
  },
  {
    name: 'get_observations',
-    description: 'Batch fetch',
-    inputSchema: z.object({
-      ids: z.array(z.number()),
-      orderBy: z.enum(['date_desc', 'date_asc']).optional(),
-      limit: z.number().optional(),
-      project: z.string().optional()
-    }),
+    description: 'Batch fetch observations',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        ids: {
+          type: 'array',
+          items: { type: 'number' },
+          description: 'Array of observation IDs (required). Optional params: get_schema("get_observations")'
+        }
+      },
+      required: ['ids'],
+      additionalProperties: true
+    },
    handler: async (args: any) => {
      return await callWorkerAPIPost('/api/observations/batch', args);
    }
  },
  {
    name: 'get_session',
-    description: 'Session by ID',
-    inputSchema: z.object({
-      id: z.number()
-    }),
+    description: 'Fetch session by ID',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        id: {
+          type: 'number',
+          description: 'Session ID (required)'
+        }
+      },
+      required: ['id']
+    },
    handler: async (args: any) => {
      return await callWorkerAPIWithPath('/api/session', args.id);
    }
  },
  {
    name: 'get_prompt',
-    description: 'Prompt by ID',
-    inputSchema: z.object({
-      id: z.number()
-    }),
+    description: 'Fetch prompt by ID',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        id: {
+          type: 'number',
+          description: 'Prompt ID (required)'
+        }
+      },
+      required: ['id']
+    },
    handler: async (args: any) => {
      return await callWorkerAPIWithPath('/api/prompt', args.id);
    }
@@ -333,7 +441,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
    tools: tools.map(tool => ({
      name: tool.name,
      description: tool.description,
-      inputSchema: zodToJsonSchema(tool.inputSchema) as Record<string, unknown>
+      inputSchema: tool.inputSchema
    }))
  };
 });
@@ -178,26 +178,35 @@ export class WorkerService {
    // Instructions endpoint - loads SKILL.md sections on-demand for progressive instruction loading
    this.app.get('/api/instructions', async (req, res) => {
      const topic = (req.query.topic as string) || 'all';
-      // Read SKILL.md from plugin directory
+      const operation = req.query.operation as string | undefined;
+
      // Path resolution: __dirname is build output directory (plugin/scripts/)
      // SKILL.md is at plugin/skills/mem-search/SKILL.md
-      const skillPath = path.join(__dirname, '../skills/mem-search/SKILL.md');
+      // Operations are at plugin/skills/mem-search/operations/*.md

      try {
-        const fullContent = await fs.promises.readFile(skillPath, 'utf-8');
+        let content: string;

-        // Extract section based on topic
-        const section = this.extractInstructionSection(fullContent, topic);
+        if (operation) {
+          // Load specific operation file
+          const operationPath = path.join(__dirname, '../skills/mem-search/operations', `${operation}.md`);
+          content = await fs.promises.readFile(operationPath, 'utf-8');
+        } else {
+          // Load SKILL.md and extract section based on topic (backward compatibility)
+          const skillPath = path.join(__dirname, '../skills/mem-search/SKILL.md');
+          const fullContent = await fs.promises.readFile(skillPath, 'utf-8');
+          content = this.extractInstructionSection(fullContent, topic);
+        }

        // Return in MCP format
        res.json({
          content: [{
            type: 'text',
-            text: section
+            text: content
          }]
        });
      } catch (error) {
-        logger.error('WORKER', 'Failed to load instructions', { topic, skillPath }, error as Error);
+        logger.error('WORKER', 'Failed to load instructions', { topic, operation }, error as Error);
        res.status(500).json({
          content: [{
            type: 'text',