Some checks failed
lakehouse/auditor 1 blocking issue: todo!() macro call in tests/real-world/scrum_master_pipeline.ts
Matrix retrieval often surfaces high-cosine chunks that are about
symbols the focus file IMPORTS but doesn't define. The reviewer LLM
then hallucinates those imported-crate internals as in-file content
("I see main.rs does X" when X lives in queryd::context).
mcp-server/relevance.ts — pure scorer with five signals:
path_match +1.0 chunk source/doc_id encodes focus path
defined_match +0.6 chunk text mentions focus.defined_symbols
token_overlap +0.4 jaccard of non-stopword tokens
prefix_match +0.3 shared first-2-segment prefix
import_only -0.5 mentions only imported symbols (pollution)
Default threshold 0.3 — tuned empirically on the gateway/main.rs case.
Also fixes a regex bug in the import extractor: the character class
was lowercase-only, so `use catalogd::Registry;` silently never
matched (regex backed off when it hit the uppercase R). Caught by
the test suite.
observer.ts — POST /relevance endpoint wraps filterChunks().
scrum_master_pipeline.ts — fetchMatrixContext gains optional
focusContent param; calls /relevance after collecting allHits and
before sort+top. Opt-out via LH_RELEVANCE_FILTER=0; threshold via
LH_RELEVANCE_THRESHOLD. Fall-open on observer failure.
9 unit tests, all green. Live probe on real shape correctly drops
a 0.7-cosine adjacency-pollution chunk while keeping in-focus hits.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
130 lines
5.3 KiB
TypeScript
130 lines
5.3 KiB
TypeScript
import { test, expect } from "bun:test";
|
|
import {
|
|
scoreRelevance,
|
|
filterChunks,
|
|
extractDefinedSymbols,
|
|
extractImportedSymbols,
|
|
jaccard,
|
|
tokenize,
|
|
} from "./relevance";
|
|
|
|
const RUST_FOCUS = `
|
|
use queryd::context::build_context;
|
|
use catalogd::Registry;
|
|
use shared::types::{Tombstone, ModelProfile};
|
|
|
|
pub struct GatewayState {
|
|
catalog: Registry,
|
|
}
|
|
|
|
pub async fn handle_query(state: &GatewayState, sql: &str) -> Result<QueryResponse, Error> {
|
|
let ctx = build_context(&state.catalog).await?;
|
|
ctx.sql(sql).await.map(QueryResponse::from)
|
|
}
|
|
|
|
pub fn shutdown(state: GatewayState) {
|
|
drop(state);
|
|
}
|
|
`;
|
|
|
|
test("extractDefinedSymbols pulls pub fn / struct names", () => {
|
|
const syms = extractDefinedSymbols(RUST_FOCUS);
|
|
expect(syms).toContain("handle_query");
|
|
expect(syms).toContain("shutdown");
|
|
expect(syms).toContain("GatewayState");
|
|
});
|
|
|
|
test("extractImportedSymbols pulls names from use statements", () => {
|
|
const syms = extractImportedSymbols(RUST_FOCUS);
|
|
expect(syms).toContain("build_context");
|
|
expect(syms).toContain("Registry");
|
|
expect(syms).toContain("Tombstone");
|
|
expect(syms).toContain("ModelProfile");
|
|
// Should not include keywords
|
|
expect(syms).not.toContain("use");
|
|
expect(syms).not.toContain("crate");
|
|
});
|
|
|
|
test("path_match dominates when chunk encodes focus path", () => {
|
|
const focus = { path: "crates/gateway/src/main.rs", content: RUST_FOCUS };
|
|
const chunk = {
|
|
source: "distilled_factual_v20260423095819",
|
|
doc_id: "crates/gateway/src/main.rs:42",
|
|
text: "Some chunk content unrelated to anything",
|
|
score: 0.5,
|
|
};
|
|
const { score, reasons } = scoreRelevance(focus, chunk);
|
|
expect(score).toBeGreaterThanOrEqual(1.0);
|
|
expect(reasons).toContain("path_match");
|
|
});
|
|
|
|
test("import_only adjacency pollution gets penalized", () => {
|
|
// Chunk talks about queryd::context::build_context (imported by focus)
|
|
// but never mentions any focus-defined symbol — classic pollution.
|
|
const focus = { path: "crates/gateway/src/main.rs", content: RUST_FOCUS };
|
|
const chunk = {
|
|
source: "distilled_procedural_v20260423102847",
|
|
doc_id: "proc_8421",
|
|
text: "When build_context fails the Registry must be invalidated. The Tombstone fields drive the merge-on-read filter — caller should not retry on stale fingerprints.",
|
|
score: 0.65,
|
|
};
|
|
const { score, reasons } = scoreRelevance(focus, chunk);
|
|
expect(reasons.some(r => r.startsWith("import_only("))).toBe(true);
|
|
expect(score).toBeLessThan(0.3); // below default threshold → dropped
|
|
});
|
|
|
|
test("defined_match keeps a chunk that's actually about the focus", () => {
|
|
const focus = { path: "crates/gateway/src/main.rs", content: RUST_FOCUS };
|
|
const chunk = {
|
|
source: "distilled_factual_v20260423095819",
|
|
doc_id: "fact_12",
|
|
text: "handle_query in GatewayState must return QueryResponse, not anyhow::Error. The shutdown path drops state synchronously.",
|
|
score: 0.4,
|
|
};
|
|
const { score, reasons } = scoreRelevance(focus, chunk);
|
|
expect(reasons.some(r => r.startsWith("defined_match"))).toBe(true);
|
|
expect(score).toBeGreaterThan(0.3); // above threshold → kept
|
|
});
|
|
|
|
test("filterChunks bucket-sorts kept vs dropped", () => {
|
|
const focus = { path: "crates/gateway/src/main.rs", content: RUST_FOCUS };
|
|
const chunks = [
|
|
{ source: "x", doc_id: "crates/gateway/src/main.rs:1", text: "anything", score: 0.5 }, // path_match — kept
|
|
{ source: "x", doc_id: "y", text: "build_context Tombstone Registry adjacent only", score: 0.7 }, // import_only — dropped
|
|
{ source: "x", doc_id: "z", text: "handle_query and GatewayState are at fault here", score: 0.4 }, // defined_match — kept
|
|
{ source: "x", doc_id: "w", text: "completely unrelated content about chicago permits", score: 0.6 }, // nothing — dropped
|
|
];
|
|
const result = filterChunks(focus, chunks);
|
|
expect(result.kept.length).toBe(2);
|
|
expect(result.dropped.length).toBe(2);
|
|
expect(result.kept.map(c => c.doc_id)).toContain("crates/gateway/src/main.rs:1");
|
|
expect(result.kept.map(c => c.doc_id)).toContain("z");
|
|
});
|
|
|
|
test("threshold override changes filter behavior", () => {
|
|
const focus = { path: "crates/queryd/src/x.rs", content: "pub fn foo() {}" };
|
|
const weak = { source: "x", doc_id: "y", text: "foo is referenced here briefly", score: 0.2 };
|
|
const result_strict = filterChunks(focus, [weak], 0.95);
|
|
const result_loose = filterChunks(focus, [weak], 0.1);
|
|
expect(result_strict.kept.length).toBe(0);
|
|
expect(result_loose.kept.length).toBe(1);
|
|
});
|
|
|
|
test("empty defined/imported gracefully scores by tokens only", () => {
|
|
const focus = { path: "doc.md", content: "This is plain prose about welders in Chicago." };
|
|
const chunk = { source: "x", doc_id: "y", text: "Welders working in Chicago need OSHA certs.", score: 0.5 };
|
|
const { score, reasons } = scoreRelevance(focus, chunk);
|
|
expect(score).toBeGreaterThan(0);
|
|
expect(reasons.some(r => r.startsWith("token_overlap"))).toBe(true);
|
|
});
|
|
|
|
test("jaccard / tokenize basic sanity", () => {
|
|
const a = tokenize("the quick brown fox jumps over the lazy dog");
|
|
const b = tokenize("a fast brown wolf runs over a tired dog");
|
|
expect(a.has("the")).toBe(false); // stopword
|
|
expect(a.has("brown")).toBe(true);
|
|
const j = jaccard(a, b);
|
|
expect(j).toBeGreaterThan(0);
|
|
expect(j).toBeLessThan(1);
|
|
});
|