// Package replay ports scripts/distillation/replay.ts to Go. // // Replay takes a task → retrieves matching playbooks/RAG records → // builds a context bundle → calls a LOCAL model via the gateway's // /v1/chat → validates → escalates to a stronger model if needed → // logs the run as new evidence in `data/_kb/replay_runs.jsonl`. // // Spec invariants (carry over from replay.ts): // - never bypass retrieval (unless caller passes NoRetrieval) // - never discard provenance // - never allow free-form hallucinated output (validation gate) // - log every run as new evidence // // This is NOT training — it's runtime behavior shaping via retrieval. package replay // ReplayRequest mirrors the TS interface. NoRetrieval skips the // context bundle entirely (baseline mode for A/B tests). DryRun returns // a deterministic synthetic response without calling the gateway — // used by tests to exercise retrieval/validation without an LLM. type ReplayRequest struct { Task string LocalOnly bool AllowEscalation bool NoRetrieval bool DryRun bool GatewayURL string // overrides $LH_GATEWAY_URL LocalModel string // overrides default EscalationModel string // overrides default } // RagSample is one record in exports/rag/playbooks.jsonl. type RagSample struct { ID string `json:"id"` Title string `json:"title"` Content string `json:"content"` Tags []string `json:"tags"` SourceRunID string `json:"source_run_id"` SuccessScore string `json:"success_score"` SourceCategory string `json:"source_category"` } // RetrievedArtifact is one playbook surfaced into a ContextBundle. type RetrievedArtifact struct { RagID string `json:"rag_id"` SourceRunID string `json:"source_run_id"` Title string `json:"title"` ContentPreview string `json:"content_preview"` // first 240 chars SuccessScore string `json:"success_score"` Tags []string `json:"tags"` Score float64 `json:"score"` } // ContextBundle is what the prompt builder consumes. Empty bundles // (no retrieved playbooks) still pass through — buildPrompt downgrades // to a no-context prompt when both accepted and warnings are empty. type ContextBundle struct { RetrievedPlaybooks []RetrievedArtifact `json:"retrieved_playbooks"` PriorSuccessfulOutputs []RetrievedArtifact `json:"prior_successful_outputs"` FailurePatterns []RetrievedArtifact `json:"failure_patterns"` ValidationSteps []string `json:"validation_steps"` BundleTokenEstimate int `json:"bundle_token_estimate"` } // ValidationResult is the deterministic gate's verdict. Reasons is // always non-nil so JSON consumers can iterate without a nil check. type ValidationResult struct { Passed bool `json:"passed"` Reasons []string `json:"reasons"` } // ReplayResult is what Replay returns. Mirrors the TS type one-to-one // so JSONL emitted by either runtime parses identically. type ReplayResult struct { InputTask string `json:"input_task"` TaskHash string `json:"task_hash"` RetrievedArtifacts RetrievedIDs `json:"retrieved_artifacts"` ContextBundle *ContextBundle `json:"context_bundle"` ModelResponse string `json:"model_response"` ModelUsed string `json:"model_used"` EscalationPath []string `json:"escalation_path"` ValidationResult ValidationResult `json:"validation_result"` RecordedRunID string `json:"recorded_run_id"` RecordedAt string `json:"recorded_at"` DurationMs int64 `json:"duration_ms"` } // RetrievedIDs is the {rag_ids} envelope the TS shape uses. type RetrievedIDs struct { RagIDs []string `json:"rag_ids"` } // Defaults match replay.ts. Override via env or ReplayRequest fields. const ( DefaultLocalModel = "qwen3.5:latest" DefaultEscalationModel = "deepseek-v3.1:671b" DefaultGatewayURL = "http://localhost:3110" )