From a001a219023846db174c5fb50bb993bc0af0bea2 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 17 Apr 2026 00:41:46 -0500 Subject: [PATCH] MCP self-orientation: /context + /verify + architecture resources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Any agent (Claude Code via MCP stdio, or sub-agents via HTTP :3700) can now self-orient without human explanation: GET /context returns: - System purpose and name - All datasets with row counts - All vector indexes with backends - Available models and their strengths - Complete tool list with rules - Current VRAM state POST /verify fact-checks any claim about a worker against the golden data. Agent says "worker 1313 is a Forklift Operator in IL with reliability 0.82" → endpoint returns verified=true/false with exact discrepancies. MCP resources (stdio path for Claude Code): - lakehouse://system — live system status - lakehouse://architecture — full PRD - lakehouse://instructions — agent operating manual - lakehouse://playbooks — successful operations database - lakehouse://datasets — dataset listing This is the "command and control" layer J asked for: any agent connecting to this system gets the context it needs to operate independently. No human intermediary required. Co-Authored-By: Claude Opus 4.6 (1M context) --- .mcp.json | 3 +- mcp-server/index.ts | 132 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 132 insertions(+), 3 deletions(-) diff --git a/.mcp.json b/.mcp.json index 9071808..6c33296 100644 --- a/.mcp.json +++ b/.mcp.json @@ -4,7 +4,8 @@ "command": "bun", "args": ["run", "/home/profit/lakehouse/mcp-server/index.ts"], "env": { - "LAKEHOUSE_URL": "http://localhost:3100" + "LAKEHOUSE_URL": "http://localhost:3100", + "MCP_TRANSPORT": "stdio" } } } diff --git a/mcp-server/index.ts b/mcp-server/index.ts index 3bcb7c7..a572356 100644 --- a/mcp-server/index.ts +++ b/mcp-server/index.ts @@ -172,7 +172,75 @@ server.tool( }, ); -// Resources +// Resources — these give any MCP client full context about the system + +server.resource("lakehouse://system", "lakehouse://system", async (uri) => { + const health = await api("GET", "/health"); + const datasets = await api("GET", "/catalog/datasets") as any[]; + const indexes = await api("GET", "/vectors/indexes") as any[]; + const vram = await api("GET", "/ai/vram"); + const agent = await api("GET", "/vectors/agent/status"); + const buckets = await api("GET", "/storage/buckets"); + + const text = `# Lakehouse System Status + +## Health: ${health === "lakehouse ok" ? "OK" : JSON.stringify(health)} + +## Datasets (${datasets.length}) +${datasets.map((d: any) => `- ${d.name}: ${d.row_count || "?"} rows`).join("\n")} + +## Vector Indexes (${indexes.length}) +${(indexes as any[]).map((i: any) => `- ${i.index_name}: ${i.chunk_count} chunks (${i.vector_backend || "parquet"})`).join("\n")} + +## GPU +- Used: ${vram?.gpu?.used_mib || "?"}/${vram?.gpu?.total_mib || "?"} MiB +- Models loaded: ${(vram?.ollama_loaded || []).map((m: any) => m.name).join(", ") || "none"} + +## Autotune Agent +- Running: ${agent?.running}, Trials: ${agent?.trials_run}, Promotions: ${agent?.promotions} + +## Buckets (${(buckets as any[])?.length || 0}) +${(buckets as any[] || []).map((b: any) => `- ${b.name}: ${b.backend} (${b.reachable ? "reachable" : "DOWN"})`).join("\n")} + +## Services +- Lakehouse Gateway: :3100 +- AI Sidecar: :3200 +- Agent Gateway: :3700 +- Langfuse: :3001 +- MinIO S3: :9000 +- Ollama: :11434 + +## Available Models +- qwen3: 8.2B, 40K context, thinking+tools (best for reasoning) +- qwen2.5: 7B, 8K context (best for fast SQL generation) +- mistral: 7B, 8K context (general generation) +- nomic-embed-text: 137M (embedding, automatic) +`; + return { contents: [{ uri: uri.href, mimeType: "text/plain", text }] }; +}); + +server.resource("lakehouse://architecture", "lakehouse://architecture", async (uri) => { + // Read the PRD directly + const prd = await Bun.file("/home/profit/lakehouse/docs/PRD.md").text().catch(() => "PRD not found"); + return { contents: [{ uri: uri.href, mimeType: "text/markdown", text: prd }] }; +}); + +server.resource("lakehouse://instructions", "lakehouse://instructions", async (uri) => { + const instructions = await Bun.file("/home/profit/lakehouse/mcp-server/AGENT_INSTRUCTIONS.md").text().catch(() => "Instructions not found"); + return { contents: [{ uri: uri.href, mimeType: "text/markdown", text: instructions }] }; +}); + +server.resource("lakehouse://playbooks", "lakehouse://playbooks", async (uri) => { + const r = await api("POST", "/query/sql", { + sql: "SELECT * FROM successful_playbooks ORDER BY timestamp DESC LIMIT 20" + }); + const rows = r?.rows || []; + const text = rows.length === 0 + ? "No playbooks yet. Log successful operations with the log_success tool." + : rows.map((p: any) => `## ${p.operation}\n- Approach: ${p.approach}\n- Result: ${p.result}\n- Context: ${p.context || "—"}\n`).join("\n"); + return { contents: [{ uri: uri.href, mimeType: "text/markdown", text: `# Successful Playbooks\n\n${text}` }] }; +}); + server.resource("lakehouse://datasets", "lakehouse://datasets", async (uri) => { const r = await api("GET", "/catalog/datasets") as any[]; const text = r.map(d => `${d.name}: ${d.row_count || "?"} rows`).join("\n"); @@ -204,7 +272,67 @@ async function main() { try { // Health - if (url.pathname === "/health") return ok({ status: "ok", lakehouse: BASE, tools: 9 }); + if (url.pathname === "/health") return ok({ status: "ok", lakehouse: BASE, tools: 11 }); + + // Self-orientation: any agent calls this first to understand the system + if (url.pathname === "/context") { + const instructions = await Bun.file("/home/profit/lakehouse/mcp-server/AGENT_INSTRUCTIONS.md").text().catch(() => ""); + const datasets = await api("GET", "/catalog/datasets") as any[]; + const indexes = await api("GET", "/vectors/indexes") as any[]; + const vram = await api("GET", "/ai/vram"); + return ok({ + system: "Lakehouse Staffing Co-Pilot", + purpose: "AI anticipates staffing coordinator needs — pre-matches workers to contracts, surfaces alerts, builds playbooks from successful operations", + instructions: instructions.slice(0, 3000), + datasets: (datasets || []).map((d: any) => ({ name: d.name, rows: d.row_count })), + indexes: (indexes || []).map((i: any) => ({ name: i.index_name, chunks: i.chunk_count, backend: i.vector_backend })), + models: { qwen3: "8.2B reasoning+tools", qwen2_5: "7B fast SQL", mistral: "7B generation", nomic: "137M embedding" }, + vram: vram?.gpu, + tools: ["/search","/sql","/match","/worker/:id","/ask","/log","/playbooks","/profile/:id","/vram","/context","/verify"], + rules: [ + "Never hallucinate — only state facts from tool responses", + "SQL for counts/aggregations, hybrid /search for matching", + "Log every successful operation to /log", + "Check /playbooks before complex tasks", + "Verify worker details via /worker/:id before communicating", + ], + }); + } + + // Verification endpoint — agent can check any claim against SQL + if (url.pathname === "/verify") { + const b = await json(); + // b.claim: "worker 4925 is a Forklift Operator in IL with reliability 0.82" + // b.worker_id: 4925 + // b.checks: { role: "Forklift Operator", state: "IL", reliability: 0.82 } + if (!b.worker_id) return err("worker_id required"); + const r = await api("POST", "/query/sql", { + sql: `SELECT * FROM ethereal_workers WHERE worker_id = ${b.worker_id}` + }); + const worker = r?.rows?.[0]; + if (!worker) return ok({ verified: false, reason: `worker ${b.worker_id} not found` }); + + const checks = b.checks || {}; + const failures: string[] = []; + for (const [field, expected] of Object.entries(checks)) { + const actual = worker[field]; + if (actual === undefined) continue; + if (typeof expected === "number") { + if (Math.abs(Number(actual) - expected) > 0.05) { + failures.push(`${field}: claimed=${expected} actual=${actual}`); + } + } else if (String(actual).toLowerCase() !== String(expected).toLowerCase()) { + failures.push(`${field}: claimed=${expected} actual=${actual}`); + } + } + return ok({ + verified: failures.length === 0, + worker_id: b.worker_id, + worker_name: worker.name, + failures, + actual: worker, + }); + } // Tool: hybrid search if (url.pathname === "/search") {