golangLAKEHOUSE/internal/replay/replay_test.go

package replay

import (
	"context"
	"encoding/json"
	"os"
	"path/filepath"
	"strings"
	"testing"
)

// ─── Tokenization + retrieval primitives ───────────────────────────

func TestTokenize_FiltersShortAndLowercase(t *testing.T) {
	got := tokenize("Hello, World! Foo BAR baz x12 a")
	want := map[string]bool{"hello": true, "world": true, "foo": true, "bar": true, "baz": true, "x12": true}
	for k := range want {
		if _, ok := got[k]; !ok {
			t.Errorf("missing token %q", k)
		}
	}
	if _, ok := got["a"]; ok {
		t.Errorf("len=1 token should be filtered: a")
	}
}

func TestJaccard_EdgeCases(t *testing.T) {
	a := map[string]struct{}{"x": {}, "y": {}, "z": {}}
	b := map[string]struct{}{"y": {}, "z": {}, "w": {}}
	got := jaccard(a, b)
	want := 2.0 / 4.0 // |A∩B|=2 (y,z); |A∪B|=4 (x,y,z,w)
	if got != want {
		t.Errorf("jaccard = %v, want %v", got, want)
	}
	if jaccard(map[string]struct{}{}, b) != 0 {
		t.Error("empty set should produce 0")
	}
}

// ─── Retrieval ───────────────────────────────────────────────────

func TestRetrieveRag_ScoresAndCaps(t *testing.T) {
	corpus := []RagSample{
		{ID: "p1", Title: "validate scrum", Content: "verify the build, check tests", Tags: []string{"scrum"}, SuccessScore: "accepted"},
		{ID: "p2", Title: "irrelevant cooking notes", Content: "boil pasta longer than ten minutes", Tags: []string{"food"}, SuccessScore: "accepted"},
		{ID: "p3", Title: "build verification ladder", Content: "verify build steps, assert green", Tags: []string{"build"}, SuccessScore: "partially_accepted"},
	}
	got := retrieveRag(corpus, "verify the build assert green", 3)
	if len(got) == 0 {
		t.Fatal("expected at least one result")
	}
	for _, a := range got {
		if a.RagID == "p2" {
			t.Errorf("irrelevant sample p2 should not surface, got: %+v", got)
		}
	}
}

func TestBuildContextBundle_SplitsAcceptedAndPartial(t *testing.T) {
	corpus := []RagSample{
		{ID: "a1", Title: "A1", Content: "verify build assert green check tests", SuccessScore: "accepted"},
		{ID: "p1", Title: "P1", Content: "verify build sometimes fails to assert", SuccessScore: "partially_accepted"},
	}
	b := BuildContextBundle(corpus, "verify build assert tests")
	if b == nil {
		t.Fatal("nil bundle")
	}
	if len(b.PriorSuccessfulOutputs) != 1 || b.PriorSuccessfulOutputs[0].RagID != "a1" {
		t.Errorf("accepted bucket wrong: %+v", b.PriorSuccessfulOutputs)
	}
	if len(b.FailurePatterns) != 1 || b.FailurePatterns[0].RagID != "p1" {
		t.Errorf("partially_accepted bucket wrong: %+v", b.FailurePatterns)
	}
	if len(b.ValidationSteps) == 0 {
		t.Errorf("expected validation_steps from accepted sample, got none")
	}
}

// ─── Prompt assembly ─────────────────────────────────────────────

func TestBuildPrompt_NoBundleIsCompact(t *testing.T) {
	p := BuildPrompt("rebuild evidence index", nil)
	if !strings.Contains(p.User, "Task: rebuild evidence index") {
		t.Errorf("user prompt missing task: %q", p.User)
	}
	if strings.Contains(p.User, "## Prior successful runs") {
		t.Error("no-bundle prompt should not include retrieval headers")
	}
}

func TestBuildPrompt_WithBundleIncludesAllSections(t *testing.T) {
	bundle := &ContextBundle{
		PriorSuccessfulOutputs: []RetrievedArtifact{{RagID: "a1", Title: "A1", ContentPreview: "verified", SuccessScore: "accepted"}},
		FailurePatterns:        []RetrievedArtifact{{RagID: "p1", Title: "P1", ContentPreview: "partial result", SuccessScore: "partially_accepted"}},
		ValidationSteps:        []string{"verify the build"},
	}
	p := BuildPrompt("task X", bundle)
	for _, marker := range []string{
		"## Prior successful runs",
		"## Patterns that produced PARTIAL results",
		"## Validation checklist",
		"## Task",
		"task X",
	} {
		if !strings.Contains(p.User, marker) {
			t.Errorf("user prompt missing marker %q in:\n%s", marker, p.User)
		}
	}
}

// ─── Validation gate ─────────────────────────────────────────────

func TestValidateResponse_FailsOnEmptyAndShort(t *testing.T) {
	if got := ValidateResponse("", nil); got.Passed {
		t.Error("empty should fail")
	}
	if got := ValidateResponse("too short", nil); got.Passed {
		t.Error("too-short should fail")
	}
}

func TestValidateResponse_FailsOnFiller(t *testing.T) {
	resp := strings.Repeat("This is a real long response that meets the eighty character minimum for the gate. ", 2) +
		" As an AI, I cannot help."
	got := ValidateResponse(resp, nil)
	if got.Passed {
		t.Errorf("response with hedge phrase should fail, reasons=%v", got.Reasons)
	}
}

func TestValidateResponse_PassesWhenChecklistOverlaps(t *testing.T) {
	bundle := &ContextBundle{ValidationSteps: []string{"verify the build is green"}}
	resp := "I followed the procedure and verified that the build is green and tests passed before merging the change."
	got := ValidateResponse(resp, bundle)
	if !got.Passed {
		t.Errorf("expected pass, got reasons=%v", got.Reasons)
	}
}

func TestValidateResponse_FailsWhenChecklistOrthogonal(t *testing.T) {
	bundle := &ContextBundle{ValidationSteps: []string{"verify mango ripeness"}}
	resp := "I followed completely unrelated steps about Quantum Tax compliance — I did not look at any fruit at all and that's the point."
	got := ValidateResponse(resp, bundle)
	if got.Passed {
		t.Errorf("expected fail because no checklist token overlap, got pass")
	}
}

// ─── End-to-end (dry-run, no LLM) ────────────────────────────────

func TestReplay_DryRun_LogsResult(t *testing.T) {
	root := t.TempDir()
	mustWriteRagFixture(t, root, []RagSample{
		{ID: "p1", Title: "build verification", Content: "verify the build, check tests pass before merge",
			Tags: []string{"scrum"}, SuccessScore: "accepted", SourceRunID: "r-1"},
	})

	res, err := Replay(context.Background(), ReplayRequest{
		Task:   "verify the build before merging",
		DryRun: true,
	}, root)
	if err != nil {
		t.Fatalf("Replay: %v", err)
	}
	if res.RecordedRunID == "" {
		t.Error("expected recorded_run_id")
	}
	if !strings.HasPrefix(res.RecordedRunID, "replay:") {
		t.Errorf("run_id shape: %s", res.RecordedRunID)
	}
	if res.ContextBundle == nil {
		t.Fatal("expected retrieval to fire by default")
	}
	if len(res.ContextBundle.RetrievedPlaybooks) == 0 {
		t.Errorf("expected at least one retrieved playbook")
	}

	logPath := filepath.Join(root, "data/_kb/replay_runs.jsonl")
	body, err := os.ReadFile(logPath)
	if err != nil {
		t.Fatalf("read log: %v", err)
	}
	var row map[string]any
	if err := json.Unmarshal([]byte(strings.TrimSpace(string(body))), &row); err != nil {
		t.Fatalf("parse log row: %v", err)
	}
	if row["schema"] != "replay_run.v1" {
		t.Errorf("schema field: %v", row["schema"])
	}
}

func TestReplay_NoRetrievalSkipsCorpus(t *testing.T) {
	root := t.TempDir()
	mustWriteRagFixture(t, root, []RagSample{
		{ID: "p1", Title: "would match", Content: "verify build assert", SuccessScore: "accepted"},
	})

	res, err := Replay(context.Background(), ReplayRequest{
		Task:        "verify build assert",
		DryRun:      true,
		NoRetrieval: true,
	}, root)
	if err != nil {
		t.Fatalf("Replay: %v", err)
	}
	if res.ContextBundle != nil {
		t.Errorf("expected nil bundle in NoRetrieval mode")
	}
	if len(res.RetrievedArtifacts.RagIDs) != 0 {
		t.Errorf("expected empty rag_ids, got %v", res.RetrievedArtifacts.RagIDs)
	}
}

func TestReplay_EscalationFiresOnFailedValidation(t *testing.T) {
	root := t.TempDir()
	// Trick: the dry-run synthesizer copies validation_steps verbatim
	// into its output. If a checklist step contains a hedge phrase, the
	// synthesized response will contain it too — triggering the
	// filler-pattern guard in ValidateResponse and forcing escalation.
	mustWriteRagFixture(t, root, []RagSample{
		{ID: "p1", Title: "demo step", Content: "verify the build then i cannot proceed without approval", SuccessScore: "accepted"},
	})

	res, err := Replay(context.Background(), ReplayRequest{
		Task:            "verify the build then proceed",
		DryRun:          true,
		AllowEscalation: true,
	}, root)
	if err != nil {
		t.Fatalf("Replay: %v", err)
	}
	if len(res.EscalationPath) < 2 {
		t.Errorf("expected escalation, path=%v reasons=%v", res.EscalationPath, res.ValidationResult.Reasons)
	}
	if !strings.Contains(res.ModelResponse, "[ESCALATED]") {
		t.Errorf("expected escalated marker in response, got: %q", res.ModelResponse)
	}
}

func TestReplay_NoEscalationWhenValidationPasses(t *testing.T) {
	root := t.TempDir()
	mustWriteRagFixture(t, root, []RagSample{
		{ID: "p1", Title: "build verification", Content: "verify the build, check tests pass before merge",
			Tags: []string{"scrum"}, SuccessScore: "accepted", SourceRunID: "r-1"},
	})

	res, err := Replay(context.Background(), ReplayRequest{
		Task:            "verify the build before merging",
		DryRun:          true,
		AllowEscalation: true,
	}, root)
	if err != nil {
		t.Fatalf("Replay: %v", err)
	}
	if len(res.EscalationPath) != 1 {
		t.Errorf("expected single-step path on validation pass, got %v", res.EscalationPath)
	}
	if !res.ValidationResult.Passed {
		t.Errorf("expected pass, got reasons=%v", res.ValidationResult.Reasons)
	}
}

// ─── Helpers ────────────────────────────────────────────────────

func mustWriteRagFixture(t *testing.T, root string, samples []RagSample) {
	t.Helper()
	path := filepath.Join(root, "exports/rag/playbooks.jsonl")
	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
		t.Fatalf("mkdir: %v", err)
	}
	var buf strings.Builder
	for _, s := range samples {
		b, err := json.Marshal(s)
		if err != nil {
			t.Fatalf("marshal sample: %v", err)
		}
		buf.Write(b)
		buf.WriteByte('\n')
	}
	if err := os.WriteFile(path, []byte(buf.String()), 0o644); err != nil {
		t.Fatalf("write fixture: %v", err)
	}
}