// Phase 2 (LLM review) implementation. Sends bounded chunks of the // repo to the local model, asks for strict JSON Findings, retries // once on parse failure, marks the phase degraded if the second // attempt also fails. Raw output is saved either way — operators // can re-parse manually if the harness rejected something useful. package llm import ( "context" "encoding/json" "fmt" "strings" "time" "local-review-harness/internal/analyzers" "local-review-harness/internal/scanner" ) // ReviewInput is one bounded review request. The harness chunks the // scan result into ReviewInputs (one per file or one per file-group) // before calling Review. type ReviewInput struct { ChunkID string // stable per-chunk identifier (file path for v0) Description string // human label (e.g. "internal/foo/bar.go") Content string // the actual code/content to review Language string // for the prompt context } // ReviewOutput is what one Review call produces. RawContent is the // model's verbatim output before parsing — saved for forensics if // parsing fails. type ReviewOutput struct { ChunkID string `json:"chunk_id"` Findings []analyzers.Finding `json:"findings"` RawContent string `json:"raw_content"` Parsed bool `json:"parsed"` Retried bool `json:"retried"` Error string `json:"error,omitempty"` } // Reviewer wraps a Provider with the prompt + retry logic. Stateless; // the prompt template is baked in for v0. type Reviewer struct { prov Provider model string opts CompleteOptions } // NewReviewer constructs a Reviewer pointing at the configured // primary model. opts are passed through to every Complete call; // callers tune via review-profile. func NewReviewer(prov Provider, model string, opts CompleteOptions) *Reviewer { if opts.TimeoutSeconds == 0 { opts.TimeoutSeconds = 120 } return &Reviewer{prov: prov, model: model, opts: opts} } // Review runs the 2-attempt flow: prompt → parse → retry-with-repair-prompt → parse. func (r *Reviewer) Review(ctx context.Context, in ReviewInput) ReviewOutput { out := ReviewOutput{ChunkID: in.ChunkID} // Attempt 1 prompt := buildReviewPrompt(in, false) raw, err := r.prov.CompleteJSON(ctx, r.model, prompt, r.opts) out.RawContent = raw if err != nil { out.Error = "request failed: " + err.Error() return out } if findings, perr := parseFindings(raw, in); perr == nil { out.Findings = findings out.Parsed = true return out } // Attempt 2 (repair prompt — feed the raw output back + ask for // strict JSON only). Done once; second failure is degraded. out.Retried = true repair := buildRepairPrompt(in, raw) raw2, err := r.prov.CompleteJSON(ctx, r.model, repair, r.opts) out.RawContent = raw + "\n\n---repair---\n\n" + raw2 if err != nil { out.Error = "repair request failed: " + err.Error() return out } if findings, perr := parseFindings(raw2, in); perr == nil { out.Findings = findings out.Parsed = true return out } else { out.Error = "parse failed after repair: " + perr.Error() } return out } // ReviewBatch runs Review over a slice of inputs sequentially. Could // parallelize at G3+, but local Ollama is GPU-bound and serial is // the safe v0 — burst-parallel would queue at the model server anyway. func (r *Reviewer) ReviewBatch(ctx context.Context, inputs []ReviewInput) []ReviewOutput { out := make([]ReviewOutput, 0, len(inputs)) for _, in := range inputs { select { case <-ctx.Done(): out = append(out, ReviewOutput{ ChunkID: in.ChunkID, Error: "context cancelled before chunk processed", }) continue default: } out = append(out, r.Review(ctx, in)) } return out } // === prompts === const reviewSystemPrompt = `You are a senior code reviewer auditing a single source file. Your job: emit a JSON object with a "findings" array. Each finding must include: - title (string, < 80 chars) - severity ("low" | "medium" | "high" | "critical") - file (string, the file path you were asked to review — verbatim) - line_hint (string, e.g. "42" or "100-110") - evidence (string, a SHORT direct quote from the file — must exist verbatim in the source so a downstream validator can grep it) - reason (string, one sentence explaining why this is a finding) - suggested_fix (string, optional, one sentence) - confidence (number 0.0–1.0) Severity guidance: - critical: credential leak, RCE risk, destructive command, unauthenticated mutation - high: SQL injection, broad CORS, fail-open auth, unsafe FS - medium: hardcoded paths, weak error handling, missing tests near important code - low: naming, duplication, doc drift Hard rules (failure = your output is rejected): 1. Output ONLY the JSON object. No prose before or after. 2. The evidence field MUST be a verbatim substring of the file. If you can't quote the source, drop the finding. 3. Don't invent file paths, line numbers, or test names. 4. If the file is clean, return {"findings": []}. 5. Output nothing else when you're done.` func buildReviewPrompt(in ReviewInput, _ bool) string { var b strings.Builder b.WriteString(reviewSystemPrompt) b.WriteString("\n\n---\n\n") b.WriteString("File path: ") b.WriteString(in.Description) b.WriteString("\nLanguage: ") b.WriteString(in.Language) b.WriteString("\n\nFile content:\n```\n") b.WriteString(in.Content) b.WriteString("\n```\n\nReturn JSON only.") return b.String() } func buildRepairPrompt(in ReviewInput, prev string) string { var b strings.Builder b.WriteString("Your previous output was not valid JSON or did not match the required schema.\n\n") b.WriteString("Required shape:\n") b.WriteString(`{"findings":[{"title":"...","severity":"...","file":"...","line_hint":"...","evidence":"...","reason":"...","confidence":0.0}]}`) b.WriteString("\n\nPrevious raw output (for your reference):\n") b.WriteString(abbrev(prev, 1500)) b.WriteString("\n\nFor reference, the file you were reviewing was:\n") b.WriteString(in.Description) b.WriteString("\n\nReturn ONLY the JSON object now. No explanation, no markdown fences, no apology. JSON only.") return b.String() } // === parsing === func parseFindings(raw string, in ReviewInput) ([]analyzers.Finding, error) { // Strip leading/trailing whitespace + common markdown fences. cleaned := strings.TrimSpace(raw) cleaned = strings.TrimPrefix(cleaned, "```json") cleaned = strings.TrimPrefix(cleaned, "```") cleaned = strings.TrimSuffix(cleaned, "```") cleaned = strings.TrimSpace(cleaned) if cleaned == "" { return nil, fmt.Errorf("empty content") } var shell struct { Findings []struct { Title string `json:"title"` Severity string `json:"severity"` File string `json:"file"` LineHint string `json:"line_hint"` Evidence string `json:"evidence"` Reason string `json:"reason"` SuggestedFix string `json:"suggested_fix"` Confidence float64 `json:"confidence"` } `json:"findings"` } if err := json.Unmarshal([]byte(cleaned), &shell); err != nil { return nil, fmt.Errorf("unmarshal: %w", err) } out := make([]analyzers.Finding, 0, len(shell.Findings)) for _, f := range shell.Findings { sev := normalizeSeverity(f.Severity) if sev == "" { continue // model emitted a value we don't accept } // Use the chunk's file path if model omitted/lied filePath := f.File if filePath == "" { filePath = in.Description } out = append(out, analyzers.Finding{ Title: truncate(f.Title, 80), Severity: sev, Status: analyzers.StatusSuspected, // validator (Phase D) promotes to confirmed File: filePath, LineHint: f.LineHint, Evidence: f.Evidence, Reason: f.Reason, SuggestedFix: f.SuggestedFix, Source: analyzers.SourceLLM, Confidence: clampFloat(f.Confidence, 0, 1), CheckID: "llm.review", }) } return out, nil } func normalizeSeverity(s string) analyzers.Severity { switch strings.ToLower(strings.TrimSpace(s)) { case "low": return analyzers.SeverityLow case "medium", "med": return analyzers.SeverityMedium case "high": return analyzers.SeverityHigh case "critical", "crit": return analyzers.SeverityCritical } return "" } func truncate(s string, n int) string { if len(s) <= n { return s } return s[:n] } func clampFloat(v, lo, hi float64) float64 { if v < lo { return lo } if v > hi { return hi } return v } // === chunking === // ChunkInputsFromScan produces one ReviewInput per file under the // configured size limit. Files larger than maxBytes are skipped (the // LLM phase notes them in the receipt as "skipped: too large"). v0 // is per-file; per-function chunking lands in Phase D+. func ChunkInputsFromScan(scan *scanner.Result, maxBytes int, maxChunkChars int, readFile func(abs string) string) []ReviewInput { out := []ReviewInput{} for _, f := range scan.Files { if f.Language == "" { continue // non-code files: skip LLM review (analyzers may still flag) } if f.Size > int64(maxBytes) { continue } content := readFile(f.Abs) if len(content) > maxChunkChars { content = content[:maxChunkChars] + "\n... (truncated for LLM context)\n" } out = append(out, ReviewInput{ ChunkID: f.Path, Description: f.Path, Content: content, Language: f.Language, }) } return out } // Useful for callers wiring a deadline across the whole batch. var _ = time.Now