From a001a219023846db174c5fb50bb993bc0af0bea2 Mon Sep 17 00:00:00 2001
From: root <root@island37.com>
Date: Fri, 17 Apr 2026 00:41:46 -0500
Subject: [PATCH] MCP self-orientation: /context + /verify + architecture
 resources
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Any agent (Claude Code via MCP stdio, or sub-agents via HTTP :3700)
can now self-orient without human explanation:

GET /context returns:
  - System purpose and name
  - All datasets with row counts
  - All vector indexes with backends
  - Available models and their strengths
  - Complete tool list with rules
  - Current VRAM state

POST /verify fact-checks any claim about a worker against the golden
data. Agent says "worker 1313 is a Forklift Operator in IL with
reliability 0.82" → endpoint returns verified=true/false with exact
discrepancies.

MCP resources (stdio path for Claude Code):
  - lakehouse://system — live system status
  - lakehouse://architecture — full PRD
  - lakehouse://instructions — agent operating manual
  - lakehouse://playbooks — successful operations database
  - lakehouse://datasets — dataset listing

This is the "command and control" layer J asked for: any agent
connecting to this system gets the context it needs to operate
independently. No human intermediary required.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .mcp.json           |   3 +-
 mcp-server/index.ts | 132 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 132 insertions(+), 3 deletions(-)

diff --git a/.mcp.json b/.mcp.json
index 9071808..6c33296 100644
--- a/.mcp.json
+++ b/.mcp.json
@@ -4,7 +4,8 @@
       "command": "bun",
       "args": ["run", "/home/profit/lakehouse/mcp-server/index.ts"],
       "env": {
-        "LAKEHOUSE_URL": "http://localhost:3100"
+        "LAKEHOUSE_URL": "http://localhost:3100",
+        "MCP_TRANSPORT": "stdio"
       }
     }
   }
diff --git a/mcp-server/index.ts b/mcp-server/index.ts
index 3bcb7c7..a572356 100644
--- a/mcp-server/index.ts
+++ b/mcp-server/index.ts
@@ -172,7 +172,75 @@ server.tool(
   },
 );
 
-// Resources
+// Resources — these give any MCP client full context about the system
+
+server.resource("lakehouse://system", "lakehouse://system", async (uri) => {
+  const health = await api("GET", "/health");
+  const datasets = await api("GET", "/catalog/datasets") as any[];
+  const indexes = await api("GET", "/vectors/indexes") as any[];
+  const vram = await api("GET", "/ai/vram");
+  const agent = await api("GET", "/vectors/agent/status");
+  const buckets = await api("GET", "/storage/buckets");
+
+  const text = `# Lakehouse System Status
+
+## Health: ${health === "lakehouse ok" ? "OK" : JSON.stringify(health)}
+
+## Datasets (${datasets.length})
+${datasets.map((d: any) => `- ${d.name}: ${d.row_count || "?"} rows`).join("\n")}
+
+## Vector Indexes (${indexes.length})
+${(indexes as any[]).map((i: any) => `- ${i.index_name}: ${i.chunk_count} chunks (${i.vector_backend || "parquet"})`).join("\n")}
+
+## GPU
+- Used: ${vram?.gpu?.used_mib || "?"}/${vram?.gpu?.total_mib || "?"} MiB
+- Models loaded: ${(vram?.ollama_loaded || []).map((m: any) => m.name).join(", ") || "none"}
+
+## Autotune Agent
+- Running: ${agent?.running}, Trials: ${agent?.trials_run}, Promotions: ${agent?.promotions}
+
+## Buckets (${(buckets as any[])?.length || 0})
+${(buckets as any[] || []).map((b: any) => `- ${b.name}: ${b.backend} (${b.reachable ? "reachable" : "DOWN"})`).join("\n")}
+
+## Services
+- Lakehouse Gateway: :3100
+- AI Sidecar: :3200
+- Agent Gateway: :3700
+- Langfuse: :3001
+- MinIO S3: :9000
+- Ollama: :11434
+
+## Available Models
+- qwen3: 8.2B, 40K context, thinking+tools (best for reasoning)
+- qwen2.5: 7B, 8K context (best for fast SQL generation)
+- mistral: 7B, 8K context (general generation)
+- nomic-embed-text: 137M (embedding, automatic)
+`;
+  return { contents: [{ uri: uri.href, mimeType: "text/plain", text }] };
+});
+
+server.resource("lakehouse://architecture", "lakehouse://architecture", async (uri) => {
+  // Read the PRD directly
+  const prd = await Bun.file("/home/profit/lakehouse/docs/PRD.md").text().catch(() => "PRD not found");
+  return { contents: [{ uri: uri.href, mimeType: "text/markdown", text: prd }] };
+});
+
+server.resource("lakehouse://instructions", "lakehouse://instructions", async (uri) => {
+  const instructions = await Bun.file("/home/profit/lakehouse/mcp-server/AGENT_INSTRUCTIONS.md").text().catch(() => "Instructions not found");
+  return { contents: [{ uri: uri.href, mimeType: "text/markdown", text: instructions }] };
+});
+
+server.resource("lakehouse://playbooks", "lakehouse://playbooks", async (uri) => {
+  const r = await api("POST", "/query/sql", {
+    sql: "SELECT * FROM successful_playbooks ORDER BY timestamp DESC LIMIT 20"
+  });
+  const rows = r?.rows || [];
+  const text = rows.length === 0
+    ? "No playbooks yet. Log successful operations with the log_success tool."
+    : rows.map((p: any) => `## ${p.operation}\n- Approach: ${p.approach}\n- Result: ${p.result}\n- Context: ${p.context || "—"}\n`).join("\n");
+  return { contents: [{ uri: uri.href, mimeType: "text/markdown", text: `# Successful Playbooks\n\n${text}` }] };
+});
+
 server.resource("lakehouse://datasets", "lakehouse://datasets", async (uri) => {
   const r = await api("GET", "/catalog/datasets") as any[];
   const text = r.map(d => `${d.name}: ${d.row_count || "?"} rows`).join("\n");
@@ -204,7 +272,67 @@ async function main() {
 
       try {
         // Health
-        if (url.pathname === "/health") return ok({ status: "ok", lakehouse: BASE, tools: 9 });
+        if (url.pathname === "/health") return ok({ status: "ok", lakehouse: BASE, tools: 11 });
+
+        // Self-orientation: any agent calls this first to understand the system
+        if (url.pathname === "/context") {
+          const instructions = await Bun.file("/home/profit/lakehouse/mcp-server/AGENT_INSTRUCTIONS.md").text().catch(() => "");
+          const datasets = await api("GET", "/catalog/datasets") as any[];
+          const indexes = await api("GET", "/vectors/indexes") as any[];
+          const vram = await api("GET", "/ai/vram");
+          return ok({
+            system: "Lakehouse Staffing Co-Pilot",
+            purpose: "AI anticipates staffing coordinator needs — pre-matches workers to contracts, surfaces alerts, builds playbooks from successful operations",
+            instructions: instructions.slice(0, 3000),
+            datasets: (datasets || []).map((d: any) => ({ name: d.name, rows: d.row_count })),
+            indexes: (indexes || []).map((i: any) => ({ name: i.index_name, chunks: i.chunk_count, backend: i.vector_backend })),
+            models: { qwen3: "8.2B reasoning+tools", qwen2_5: "7B fast SQL", mistral: "7B generation", nomic: "137M embedding" },
+            vram: vram?.gpu,
+            tools: ["/search","/sql","/match","/worker/:id","/ask","/log","/playbooks","/profile/:id","/vram","/context","/verify"],
+            rules: [
+              "Never hallucinate — only state facts from tool responses",
+              "SQL for counts/aggregations, hybrid /search for matching",
+              "Log every successful operation to /log",
+              "Check /playbooks before complex tasks",
+              "Verify worker details via /worker/:id before communicating",
+            ],
+          });
+        }
+
+        // Verification endpoint — agent can check any claim against SQL
+        if (url.pathname === "/verify") {
+          const b = await json();
+          // b.claim: "worker 4925 is a Forklift Operator in IL with reliability 0.82"
+          // b.worker_id: 4925
+          // b.checks: { role: "Forklift Operator", state: "IL", reliability: 0.82 }
+          if (!b.worker_id) return err("worker_id required");
+          const r = await api("POST", "/query/sql", {
+            sql: `SELECT * FROM ethereal_workers WHERE worker_id = ${b.worker_id}`
+          });
+          const worker = r?.rows?.[0];
+          if (!worker) return ok({ verified: false, reason: `worker ${b.worker_id} not found` });
+
+          const checks = b.checks || {};
+          const failures: string[] = [];
+          for (const [field, expected] of Object.entries(checks)) {
+            const actual = worker[field];
+            if (actual === undefined) continue;
+            if (typeof expected === "number") {
+              if (Math.abs(Number(actual) - expected) > 0.05) {
+                failures.push(`${field}: claimed=${expected} actual=${actual}`);
+              }
+            } else if (String(actual).toLowerCase() !== String(expected).toLowerCase()) {
+              failures.push(`${field}: claimed=${expected} actual=${actual}`);
+            }
+          }
+          return ok({
+            verified: failures.length === 0,
+            worker_id: b.worker_id,
+            worker_name: worker.name,
+            failures,
+            actual: worker,
+          });
+        }
 
         // Tool: hybrid search
         if (url.pathname === "/search") {