package replay import ( "context" "encoding/json" "os" "path/filepath" "strings" "testing" ) // ─── Tokenization + retrieval primitives ─────────────────────────── func TestTokenize_FiltersShortAndLowercase(t *testing.T) { got := tokenize("Hello, World! Foo BAR baz x12 a") want := map[string]bool{"hello": true, "world": true, "foo": true, "bar": true, "baz": true, "x12": true} for k := range want { if _, ok := got[k]; !ok { t.Errorf("missing token %q", k) } } if _, ok := got["a"]; ok { t.Errorf("len=1 token should be filtered: a") } } func TestJaccard_EdgeCases(t *testing.T) { a := map[string]struct{}{"x": {}, "y": {}, "z": {}} b := map[string]struct{}{"y": {}, "z": {}, "w": {}} got := jaccard(a, b) want := 2.0 / 4.0 // |A∩B|=2 (y,z); |A∪B|=4 (x,y,z,w) if got != want { t.Errorf("jaccard = %v, want %v", got, want) } if jaccard(map[string]struct{}{}, b) != 0 { t.Error("empty set should produce 0") } } // ─── Retrieval ─────────────────────────────────────────────────── func TestRetrieveRag_ScoresAndCaps(t *testing.T) { corpus := []RagSample{ {ID: "p1", Title: "validate scrum", Content: "verify the build, check tests", Tags: []string{"scrum"}, SuccessScore: "accepted"}, {ID: "p2", Title: "irrelevant cooking notes", Content: "boil pasta longer than ten minutes", Tags: []string{"food"}, SuccessScore: "accepted"}, {ID: "p3", Title: "build verification ladder", Content: "verify build steps, assert green", Tags: []string{"build"}, SuccessScore: "partially_accepted"}, } got := retrieveRag(corpus, "verify the build assert green", 3) if len(got) == 0 { t.Fatal("expected at least one result") } for _, a := range got { if a.RagID == "p2" { t.Errorf("irrelevant sample p2 should not surface, got: %+v", got) } } } func TestBuildContextBundle_SplitsAcceptedAndPartial(t *testing.T) { corpus := []RagSample{ {ID: "a1", Title: "A1", Content: "verify build assert green check tests", SuccessScore: "accepted"}, {ID: "p1", Title: "P1", Content: "verify build sometimes fails to assert", SuccessScore: "partially_accepted"}, } b := BuildContextBundle(corpus, "verify build assert tests") if b == nil { t.Fatal("nil bundle") } if len(b.PriorSuccessfulOutputs) != 1 || b.PriorSuccessfulOutputs[0].RagID != "a1" { t.Errorf("accepted bucket wrong: %+v", b.PriorSuccessfulOutputs) } if len(b.FailurePatterns) != 1 || b.FailurePatterns[0].RagID != "p1" { t.Errorf("partially_accepted bucket wrong: %+v", b.FailurePatterns) } if len(b.ValidationSteps) == 0 { t.Errorf("expected validation_steps from accepted sample, got none") } } // ─── Prompt assembly ───────────────────────────────────────────── func TestBuildPrompt_NoBundleIsCompact(t *testing.T) { p := BuildPrompt("rebuild evidence index", nil) if !strings.Contains(p.User, "Task: rebuild evidence index") { t.Errorf("user prompt missing task: %q", p.User) } if strings.Contains(p.User, "## Prior successful runs") { t.Error("no-bundle prompt should not include retrieval headers") } } func TestBuildPrompt_WithBundleIncludesAllSections(t *testing.T) { bundle := &ContextBundle{ PriorSuccessfulOutputs: []RetrievedArtifact{{RagID: "a1", Title: "A1", ContentPreview: "verified", SuccessScore: "accepted"}}, FailurePatterns: []RetrievedArtifact{{RagID: "p1", Title: "P1", ContentPreview: "partial result", SuccessScore: "partially_accepted"}}, ValidationSteps: []string{"verify the build"}, } p := BuildPrompt("task X", bundle) for _, marker := range []string{ "## Prior successful runs", "## Patterns that produced PARTIAL results", "## Validation checklist", "## Task", "task X", } { if !strings.Contains(p.User, marker) { t.Errorf("user prompt missing marker %q in:\n%s", marker, p.User) } } } // ─── Validation gate ───────────────────────────────────────────── func TestValidateResponse_FailsOnEmptyAndShort(t *testing.T) { if got := ValidateResponse("", nil); got.Passed { t.Error("empty should fail") } if got := ValidateResponse("too short", nil); got.Passed { t.Error("too-short should fail") } } func TestValidateResponse_FailsOnFiller(t *testing.T) { resp := strings.Repeat("This is a real long response that meets the eighty character minimum for the gate. ", 2) + " As an AI, I cannot help." got := ValidateResponse(resp, nil) if got.Passed { t.Errorf("response with hedge phrase should fail, reasons=%v", got.Reasons) } } func TestValidateResponse_PassesWhenChecklistOverlaps(t *testing.T) { bundle := &ContextBundle{ValidationSteps: []string{"verify the build is green"}} resp := "I followed the procedure and verified that the build is green and tests passed before merging the change." got := ValidateResponse(resp, bundle) if !got.Passed { t.Errorf("expected pass, got reasons=%v", got.Reasons) } } func TestValidateResponse_FailsWhenChecklistOrthogonal(t *testing.T) { bundle := &ContextBundle{ValidationSteps: []string{"verify mango ripeness"}} resp := "I followed completely unrelated steps about Quantum Tax compliance — I did not look at any fruit at all and that's the point." got := ValidateResponse(resp, bundle) if got.Passed { t.Errorf("expected fail because no checklist token overlap, got pass") } } // ─── End-to-end (dry-run, no LLM) ──────────────────────────────── func TestReplay_DryRun_LogsResult(t *testing.T) { root := t.TempDir() mustWriteRagFixture(t, root, []RagSample{ {ID: "p1", Title: "build verification", Content: "verify the build, check tests pass before merge", Tags: []string{"scrum"}, SuccessScore: "accepted", SourceRunID: "r-1"}, }) res, err := Replay(context.Background(), ReplayRequest{ Task: "verify the build before merging", DryRun: true, }, root) if err != nil { t.Fatalf("Replay: %v", err) } if res.RecordedRunID == "" { t.Error("expected recorded_run_id") } if !strings.HasPrefix(res.RecordedRunID, "replay:") { t.Errorf("run_id shape: %s", res.RecordedRunID) } if res.ContextBundle == nil { t.Fatal("expected retrieval to fire by default") } if len(res.ContextBundle.RetrievedPlaybooks) == 0 { t.Errorf("expected at least one retrieved playbook") } logPath := filepath.Join(root, "data/_kb/replay_runs.jsonl") body, err := os.ReadFile(logPath) if err != nil { t.Fatalf("read log: %v", err) } var row map[string]any if err := json.Unmarshal([]byte(strings.TrimSpace(string(body))), &row); err != nil { t.Fatalf("parse log row: %v", err) } if row["schema"] != "replay_run.v1" { t.Errorf("schema field: %v", row["schema"]) } } func TestReplay_NoRetrievalSkipsCorpus(t *testing.T) { root := t.TempDir() mustWriteRagFixture(t, root, []RagSample{ {ID: "p1", Title: "would match", Content: "verify build assert", SuccessScore: "accepted"}, }) res, err := Replay(context.Background(), ReplayRequest{ Task: "verify build assert", DryRun: true, NoRetrieval: true, }, root) if err != nil { t.Fatalf("Replay: %v", err) } if res.ContextBundle != nil { t.Errorf("expected nil bundle in NoRetrieval mode") } if len(res.RetrievedArtifacts.RagIDs) != 0 { t.Errorf("expected empty rag_ids, got %v", res.RetrievedArtifacts.RagIDs) } } func TestReplay_EscalationFiresOnFailedValidation(t *testing.T) { root := t.TempDir() // Trick: the dry-run synthesizer copies validation_steps verbatim // into its output. If a checklist step contains a hedge phrase, the // synthesized response will contain it too — triggering the // filler-pattern guard in ValidateResponse and forcing escalation. mustWriteRagFixture(t, root, []RagSample{ {ID: "p1", Title: "demo step", Content: "verify the build then i cannot proceed without approval", SuccessScore: "accepted"}, }) res, err := Replay(context.Background(), ReplayRequest{ Task: "verify the build then proceed", DryRun: true, AllowEscalation: true, }, root) if err != nil { t.Fatalf("Replay: %v", err) } if len(res.EscalationPath) < 2 { t.Errorf("expected escalation, path=%v reasons=%v", res.EscalationPath, res.ValidationResult.Reasons) } if !strings.Contains(res.ModelResponse, "[ESCALATED]") { t.Errorf("expected escalated marker in response, got: %q", res.ModelResponse) } } func TestReplay_NoEscalationWhenValidationPasses(t *testing.T) { root := t.TempDir() mustWriteRagFixture(t, root, []RagSample{ {ID: "p1", Title: "build verification", Content: "verify the build, check tests pass before merge", Tags: []string{"scrum"}, SuccessScore: "accepted", SourceRunID: "r-1"}, }) res, err := Replay(context.Background(), ReplayRequest{ Task: "verify the build before merging", DryRun: true, AllowEscalation: true, }, root) if err != nil { t.Fatalf("Replay: %v", err) } if len(res.EscalationPath) != 1 { t.Errorf("expected single-step path on validation pass, got %v", res.EscalationPath) } if !res.ValidationResult.Passed { t.Errorf("expected pass, got reasons=%v", res.ValidationResult.Reasons) } } // ─── Helpers ──────────────────────────────────────────────────── func mustWriteRagFixture(t *testing.T, root string, samples []RagSample) { t.Helper() path := filepath.Join(root, "exports/rag/playbooks.jsonl") if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { t.Fatalf("mkdir: %v", err) } var buf strings.Builder for _, s := range samples { b, err := json.Marshal(s) if err != nil { t.Fatalf("marshal sample: %v", err) } buf.Write(b) buf.WriteByte('\n') } if err := os.WriteFile(path, []byte(buf.String()), 0o644); err != nil { t.Fatalf("write fixture: %v", err) } }