#!/usr/bin/env bash # materializer smoke — Go port of scripts/distillation/build_evidence_index.ts. # Validates that the materializer: # - Builds a minimal evidence partition from a synthetic source jsonl # - Skips bad-JSON rows into distillation_skips.jsonl # - Idempotently dedups identical rows on re-run (rows_deduped > 0) # - Honors --dry-run (no files written, exit 0) # - Emits a parseable receipt.json with validation_pass set -euo pipefail cd "$(dirname "$0")/.." export PATH="$PATH:/usr/local/go/bin" echo "[materializer-smoke] building bin/materializer..." go build -o bin/materializer ./cmd/materializer ROOT="$(mktemp -d)" trap 'rm -rf "$ROOT"' EXIT INT TERM mkdir -p "$ROOT/data/_kb" cat > "$ROOT/data/_kb/distilled_facts.jsonl" < "$ROOT/data/_kb/observer_escalations.jsonl" <&1 || true)" echo "$DRY_OUT" | grep -q "DRY RUN" || { echo "expected DRY RUN marker: $DRY_OUT"; exit 1; } [ ! -d "$ROOT/data/evidence" ] || { echo "dry-run wrote evidence dir"; exit 1; } echo "[materializer-smoke] first run" # Same exit-1 path as dry-run when bad-json present; expect that. ./bin/materializer -root "$ROOT" || true OUT_FACTS="$ROOT/data/evidence/$(date -u +'%Y/%m/%d')/distilled_facts.jsonl" OUT_OBS="$ROOT/data/evidence/$(date -u +'%Y/%m/%d')/observer_escalations.jsonl" SKIPS="$ROOT/data/_kb/distillation_skips.jsonl" [ -s "$OUT_FACTS" ] || { echo "expected $OUT_FACTS"; exit 1; } [ -s "$OUT_OBS" ] || { echo "expected $OUT_OBS"; exit 1; } [ -s "$SKIPS" ] || { echo "expected $SKIPS to capture bad-json row"; exit 1; } GOOD_ROWS=$(wc -l < "$OUT_FACTS") [ "$GOOD_ROWS" -eq 2 ] || { echo "expected 2 good rows in $OUT_FACTS, got $GOOD_ROWS"; exit 1; } # Receipt — find the most recent one and parse validation_pass. RECEIPT="$(find "$ROOT/reports/distillation" -name 'receipt.json' -print0 | xargs -0 ls -t | head -1)" [ -n "$RECEIPT" ] || { echo "no receipt produced"; exit 1; } grep -q '"validation_pass": false' "$RECEIPT" || { echo "expected validation_pass=false (1 row was bad JSON):"; cat "$RECEIPT"; exit 1; } echo "[materializer-smoke] idempotent re-run" ./bin/materializer -root "$ROOT" >/tmp/materializer_smoke_rerun.txt 2>&1 || true # Rerun should fail validation again (the bad-JSON row is still there) # but successful rows should have hit dedup not write. grep -q "dedup=2" /tmp/materializer_smoke_rerun.txt || { echo "expected dedup=2 on rerun, got:"; cat /tmp/materializer_smoke_rerun.txt; exit 1; } echo "[materializer-smoke] PASS"