golangLAKEHOUSE/scripts/relevance_smoke.sh
root 9588bd82ae matrix: relevance filter — SPEC §3.4 component 3 of 5
Faithful port of mcp-server/relevance.ts (Rust observer's adjacency-
pollution filter). Same 5-signal scoring, same default threshold 0.3.
Adds POST /v1/matrix/relevance endpoint via matrixd.

Scoring signals (additive, can sign-flip):
  path_match     +1.0  chunk source/doc_id encodes focus.path
  filename_match +0.6  chunk text mentions focus's filename
  defined_match  +0.6  chunk text mentions focus.defined_symbols
  token_overlap  +0.4  jaccard of non-stopword tokens
  prefix_match   +0.3  chunk source shares first-2-segment prefix
  import_penalty -0.5  mentions ONLY imported symbols, no defined ones

What this does and doesn't do:
  - DOES filter code-aware corpora (eventually lakehouse_arch_v1,
    lakehouse_symbols_v1, scrum_findings_v1) — drops chunks about
    code the focus file IMPORTS rather than DEFINES, the
    "adjacency pollution" pattern that makes a reviewer LLM
    hallucinate imported-crate internals as belonging to the focus
  - DOES NOT meaningfully filter staffing data — the candidates
    reality test 2026-04-29 had "exact skill match buried at #3"
    which is a different problem (semantic-only ranking dominated
    by secondary text). Staffing needs structured filtering
    (status gates, location gates) that lives outside this
    package — future work, not in SPEC §3.4 yet

Headline smoke assertion: focus = crates/queryd/src/db.go which
defines Connector and imports catalogd::Registry. The filter
scores:
  Connector chunk: +0.68  (defined_match fires, kept)
  Registry chunk: -0.46  (import_only penalty fires, dropped)
  unrelated junk:  0.00  (no signals, dropped)

That's a 1.14-point gap between what we ARE and what we IMPORT —
the entire purpose of the filter.

Tests:
  - 9 unit tests in internal/matrix/relevance_test.go covering
    Tokenize, Jaccard, ExtractDefinedSymbols (Rust + TS),
    ExtractImportedSymbols, FilePrefix, ScoreRelevance per-signal,
    FilterChunks threshold splitting, and the headline
    AdjacencyPollutionScenario
  - scripts/relevance_smoke.sh integration smoke (3 assertions PASS):
    adjacency-pollution scenario, empty-chunks 400, threshold honored

13-smoke regression sweep all green (D1-D6, G1, G1P, G2,
storaged_cap, pathway, matrix, relevance).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 19:13:22 -05:00

157 lines
6.1 KiB
Bash
Executable File

#!/usr/bin/env bash
# Relevance smoke — code-relevance filter via matrixd /matrix/relevance.
# All assertions go through gateway :3110.
#
# Validates the headline adjacency-pollution scenario:
# Focus: crates/queryd/src/db.go which defines Connector.
# Chunk A is about Connector → kept (defined_match).
# Chunk B is about catalogd::Registry which db.go imports → outranked
# by Chunk A.
# Chunk C is unrelated → dropped (no signals fire).
#
# Plus negative paths:
# - Empty chunks → 400
# - Threshold honored when set explicitly
set -euo pipefail
cd "$(dirname "$0")/.."
export PATH="$PATH:/usr/local/go/bin"
echo "[relevance-smoke] building matrixd + vectord + gateway..."
go build -o bin/ ./cmd/matrixd ./cmd/vectord ./cmd/gateway
pkill -f "bin/(matrixd|vectord|gateway)" 2>/dev/null || true
sleep 0.3
PIDS=()
TMP="$(mktemp -d)"
CFG="$TMP/relevance.toml"
cleanup() {
echo "[relevance-smoke] cleanup"
for p in "${PIDS[@]}"; do [ -n "$p" ] && kill "$p" 2>/dev/null || true; done
rm -rf "$TMP"
}
trap cleanup EXIT INT TERM
# Custom toml: vectord persistence disabled. /matrix/relevance doesn't
# touch vectord at all, but matrixd config requires the URL anyway.
cat > "$CFG" <<EOF
[gateway]
bind = "127.0.0.1:3110"
storaged_url = "http://127.0.0.1:3211"
catalogd_url = "http://127.0.0.1:3212"
ingestd_url = "http://127.0.0.1:3213"
queryd_url = "http://127.0.0.1:3214"
vectord_url = "http://127.0.0.1:3215"
embedd_url = "http://127.0.0.1:3216"
pathwayd_url = "http://127.0.0.1:3217"
matrixd_url = "http://127.0.0.1:3218"
[vectord]
bind = "127.0.0.1:3215"
storaged_url = ""
[matrixd]
bind = "127.0.0.1:3218"
embedd_url = "http://127.0.0.1:3216"
vectord_url = "http://127.0.0.1:3215"
EOF
poll_health() {
local port="$1" deadline=$(($(date +%s) + 5))
while [ "$(date +%s)" -lt "$deadline" ]; do
if curl -sS --max-time 1 "http://127.0.0.1:$port/health" >/dev/null 2>&1; then return 0; fi
sleep 0.05
done
return 1
}
echo "[relevance-smoke] launching vectord → matrixd → gateway..."
./bin/vectord -config "$CFG" > /tmp/vectord.log 2>&1 &
PIDS+=($!)
poll_health 3215 || { echo "vectord failed"; tail /tmp/vectord.log; exit 1; }
./bin/matrixd -config "$CFG" > /tmp/matrixd.log 2>&1 &
PIDS+=($!)
poll_health 3218 || { echo "matrixd failed"; tail /tmp/matrixd.log; exit 1; }
./bin/gateway -config "$CFG" > /tmp/gateway.log 2>&1 &
PIDS+=($!)
poll_health 3110 || { echo "gateway failed"; tail /tmp/gateway.log; exit 1; }
FAILED=0
# ── 1. Adjacency-pollution scenario ──────────────────────────────
echo "[relevance-smoke] adjacency-pollution: Connector outranks Registry, junk dropped:"
PAYLOAD='{
"focus": {
"Path": "crates/queryd/src/db.go",
"Content": "pub struct Connector {}\npub fn open_connector() *Connector { return nil }\nuse catalogd::Registry;"
},
"chunks": [
{"source":"lakehouse_symbols_v1","doc_id":"symbol:queryd::struct::Connector","text":"Connector wraps the DuckDB handle. open_connector creates one.","score":0.9},
{"source":"lakehouse_symbols_v1","doc_id":"symbol:catalogd::struct::Registry","text":"Registry stores manifests. Used by ingestd.","score":0.85},
{"source":"lakehouse_symbols_v1","doc_id":"symbol:totally_other::Thing","text":"completely unrelated text about something else entirely","score":0.7}
],
"threshold": 0.3
}'
RESP="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/relevance -H 'Content-Type: application/json' -d "$PAYLOAD")"
# Connector chunk should be in kept
CONNECTOR_KEPT="$(echo "$RESP" | jq -r '[.kept[] | select(.doc_id | contains("Connector"))] | length')"
# The unrelated junk chunk should be in dropped
JUNK_DROPPED="$(echo "$RESP" | jq -r '[.dropped[] | select(.doc_id | contains("Thing"))] | length')"
# Connector should outrank Registry (whichever bucket they end up in)
CONN_REL="$(echo "$RESP" | jq -r '[.kept[], .dropped[] | select(.doc_id | contains("Connector"))] | .[0].relevance // -999')"
REG_REL="$(echo "$RESP" | jq -r '[.kept[], .dropped[] | select(.doc_id | contains("Registry"))] | .[0].relevance // -999')"
TOTAL_IN="$(echo "$RESP" | jq -r '.total_in')"
CONN_OUTRANKS_REG="$(awk -v a="$CONN_REL" -v b="$REG_REL" 'BEGIN{print (a>b)?"true":"false"}')"
if [ "$CONNECTOR_KEPT" = "1" ] && [ "$JUNK_DROPPED" = "1" ] && [ "$CONN_OUTRANKS_REG" = "true" ] && [ "$TOTAL_IN" = "3" ]; then
echo " ✓ Connector kept, junk dropped, Connector ($CONN_REL) > Registry ($REG_REL)"
else
echo " ✗ kept_connector=$CONNECTOR_KEPT dropped_junk=$JUNK_DROPPED conn=$CONN_REL reg=$REG_REL total=$TOTAL_IN"
echo " full: $RESP"
FAILED=1
fi
# ── 2. Empty chunks → 400 ────────────────────────────────────────
echo "[relevance-smoke] empty chunks → 400:"
HTTP="$(curl -sS -o /dev/null -w '%{http_code}' -X POST http://127.0.0.1:3110/v1/matrix/relevance \
-H 'Content-Type: application/json' \
-d '{"focus":{"Path":"x"},"chunks":[]}')"
if [ "$HTTP" = "400" ]; then
echo " ✓ 400 on empty chunks"
else
echo " ✗ got $HTTP"; FAILED=1
fi
# ── 3. Threshold honored ─────────────────────────────────────────
echo "[relevance-smoke] threshold=10 (impossibly high) drops everything:"
PAYLOAD2='{
"focus": {"Path": "x.go", "Content": "pub fn known() {}", "DefinedSymbols": ["known"]},
"chunks": [
{"source":"s","doc_id":"d1","text":"known appears here","score":0.9}
],
"threshold": 10
}'
RESP2="$(curl -sS -X POST http://127.0.0.1:3110/v1/matrix/relevance -H 'Content-Type: application/json' -d "$PAYLOAD2")"
KEPT_COUNT="$(echo "$RESP2" | jq -r '.kept | length')"
DROP_COUNT="$(echo "$RESP2" | jq -r '.dropped | length')"
if [ "$KEPT_COUNT" = "0" ] && [ "$DROP_COUNT" = "1" ]; then
echo " ✓ threshold=10 drops everything (0 kept / 1 dropped)"
else
echo " ✗ kept=$KEPT_COUNT dropped=$DROP_COUNT"; FAILED=1
fi
if [ "$FAILED" -eq 0 ]; then
echo "[relevance-smoke] Relevance acceptance gate: PASSED"
exit 0
else
echo "[relevance-smoke] Relevance acceptance gate: FAILED"
exit 1
fi