lakehouse/mcp-server/relevance.test.ts
root 0115a60072
Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
observer: add /relevance heuristic filter for adjacency pollution
Matrix retrieval often surfaces high-cosine chunks that are about
symbols the focus file IMPORTS but doesn't define. The reviewer LLM
then hallucinates those imported-crate internals as in-file content
("I see main.rs does X" when X lives in queryd::context).

mcp-server/relevance.ts — pure scorer with five signals:
  path_match      +1.0  chunk source/doc_id encodes focus path
  defined_match   +0.6  chunk text mentions focus.defined_symbols
  token_overlap   +0.4  jaccard of non-stopword tokens
  prefix_match    +0.3  shared first-2-segment prefix
  import_only    -0.5  mentions only imported symbols (pollution)

Default threshold 0.3 — tuned empirically on the gateway/main.rs case.

Also fixes a regex bug in the import extractor: the character class
was lowercase-only, so `use catalogd::Registry;` silently never
matched (regex backed off when it hit the uppercase R). Caught by
the test suite.

observer.ts — POST /relevance endpoint wraps filterChunks().
scrum_master_pipeline.ts — fetchMatrixContext gains optional
focusContent param; calls /relevance after collecting allHits and
before sort+top. Opt-out via LH_RELEVANCE_FILTER=0; threshold via
LH_RELEVANCE_THRESHOLD. Fall-open on observer failure.

9 unit tests, all green. Live probe on real shape correctly drops
a 0.7-cosine adjacency-pollution chunk while keeping in-focus hits.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 23:51:45 -05:00

130 lines
5.3 KiB
TypeScript

import { test, expect } from "bun:test";
import {
scoreRelevance,
filterChunks,
extractDefinedSymbols,
extractImportedSymbols,
jaccard,
tokenize,
} from "./relevance";
const RUST_FOCUS = `
use queryd::context::build_context;
use catalogd::Registry;
use shared::types::{Tombstone, ModelProfile};
pub struct GatewayState {
catalog: Registry,
}
pub async fn handle_query(state: &GatewayState, sql: &str) -> Result<QueryResponse, Error> {
let ctx = build_context(&state.catalog).await?;
ctx.sql(sql).await.map(QueryResponse::from)
}
pub fn shutdown(state: GatewayState) {
drop(state);
}
`;
test("extractDefinedSymbols pulls pub fn / struct names", () => {
const syms = extractDefinedSymbols(RUST_FOCUS);
expect(syms).toContain("handle_query");
expect(syms).toContain("shutdown");
expect(syms).toContain("GatewayState");
});
test("extractImportedSymbols pulls names from use statements", () => {
const syms = extractImportedSymbols(RUST_FOCUS);
expect(syms).toContain("build_context");
expect(syms).toContain("Registry");
expect(syms).toContain("Tombstone");
expect(syms).toContain("ModelProfile");
// Should not include keywords
expect(syms).not.toContain("use");
expect(syms).not.toContain("crate");
});
test("path_match dominates when chunk encodes focus path", () => {
const focus = { path: "crates/gateway/src/main.rs", content: RUST_FOCUS };
const chunk = {
source: "distilled_factual_v20260423095819",
doc_id: "crates/gateway/src/main.rs:42",
text: "Some chunk content unrelated to anything",
score: 0.5,
};
const { score, reasons } = scoreRelevance(focus, chunk);
expect(score).toBeGreaterThanOrEqual(1.0);
expect(reasons).toContain("path_match");
});
test("import_only adjacency pollution gets penalized", () => {
// Chunk talks about queryd::context::build_context (imported by focus)
// but never mentions any focus-defined symbol — classic pollution.
const focus = { path: "crates/gateway/src/main.rs", content: RUST_FOCUS };
const chunk = {
source: "distilled_procedural_v20260423102847",
doc_id: "proc_8421",
text: "When build_context fails the Registry must be invalidated. The Tombstone fields drive the merge-on-read filter — caller should not retry on stale fingerprints.",
score: 0.65,
};
const { score, reasons } = scoreRelevance(focus, chunk);
expect(reasons.some(r => r.startsWith("import_only("))).toBe(true);
expect(score).toBeLessThan(0.3); // below default threshold → dropped
});
test("defined_match keeps a chunk that's actually about the focus", () => {
const focus = { path: "crates/gateway/src/main.rs", content: RUST_FOCUS };
const chunk = {
source: "distilled_factual_v20260423095819",
doc_id: "fact_12",
text: "handle_query in GatewayState must return QueryResponse, not anyhow::Error. The shutdown path drops state synchronously.",
score: 0.4,
};
const { score, reasons } = scoreRelevance(focus, chunk);
expect(reasons.some(r => r.startsWith("defined_match"))).toBe(true);
expect(score).toBeGreaterThan(0.3); // above threshold → kept
});
test("filterChunks bucket-sorts kept vs dropped", () => {
const focus = { path: "crates/gateway/src/main.rs", content: RUST_FOCUS };
const chunks = [
{ source: "x", doc_id: "crates/gateway/src/main.rs:1", text: "anything", score: 0.5 }, // path_match — kept
{ source: "x", doc_id: "y", text: "build_context Tombstone Registry adjacent only", score: 0.7 }, // import_only — dropped
{ source: "x", doc_id: "z", text: "handle_query and GatewayState are at fault here", score: 0.4 }, // defined_match — kept
{ source: "x", doc_id: "w", text: "completely unrelated content about chicago permits", score: 0.6 }, // nothing — dropped
];
const result = filterChunks(focus, chunks);
expect(result.kept.length).toBe(2);
expect(result.dropped.length).toBe(2);
expect(result.kept.map(c => c.doc_id)).toContain("crates/gateway/src/main.rs:1");
expect(result.kept.map(c => c.doc_id)).toContain("z");
});
test("threshold override changes filter behavior", () => {
const focus = { path: "crates/queryd/src/x.rs", content: "pub fn foo() {}" };
const weak = { source: "x", doc_id: "y", text: "foo is referenced here briefly", score: 0.2 };
const result_strict = filterChunks(focus, [weak], 0.95);
const result_loose = filterChunks(focus, [weak], 0.1);
expect(result_strict.kept.length).toBe(0);
expect(result_loose.kept.length).toBe(1);
});
test("empty defined/imported gracefully scores by tokens only", () => {
const focus = { path: "doc.md", content: "This is plain prose about welders in Chicago." };
const chunk = { source: "x", doc_id: "y", text: "Welders working in Chicago need OSHA certs.", score: 0.5 };
const { score, reasons } = scoreRelevance(focus, chunk);
expect(score).toBeGreaterThan(0);
expect(reasons.some(r => r.startsWith("token_overlap"))).toBe(true);
});
test("jaccard / tokenize basic sanity", () => {
const a = tokenize("the quick brown fox jumps over the lazy dog");
const b = tokenize("a fast brown wolf runs over a tired dog");
expect(a.has("the")).toBe(false); // stopword
expect(a.has("brown")).toBe(true);
const j = jaccard(a, b);
expect(j).toBeGreaterThan(0);
expect(j).toBeLessThan(1);
});