local-review-harness/internal/llm/review.go

// Phase 2 (LLM review) implementation. Sends bounded chunks of the
// repo to the local model, asks for strict JSON Findings, retries
// once on parse failure, marks the phase degraded if the second
// attempt also fails. Raw output is saved either way — operators
// can re-parse manually if the harness rejected something useful.
package llm

import (
	"context"
	"encoding/json"
	"fmt"
	"strings"
	"time"

	"local-review-harness/internal/analyzers"
	"local-review-harness/internal/scanner"
)

// ReviewInput is one bounded review request. The harness chunks the
// scan result into ReviewInputs (one per file or one per file-group)
// before calling Review.
type ReviewInput struct {
	ChunkID     string // stable per-chunk identifier (file path for v0)
	Description string // human label (e.g. "internal/foo/bar.go")
	Content     string // the actual code/content to review
	Language    string // for the prompt context
}

// ReviewOutput is what one Review call produces. RawContent is the
// model's verbatim output before parsing — saved for forensics if
// parsing fails.
type ReviewOutput struct {
	ChunkID    string              `json:"chunk_id"`
	Findings   []analyzers.Finding `json:"findings"`
	RawContent string              `json:"raw_content"`
	Parsed     bool                `json:"parsed"`
	Retried    bool                `json:"retried"`
	Error      string              `json:"error,omitempty"`
}

// Reviewer wraps a Provider with the prompt + retry logic. Stateless;
// the prompt template is baked in for v0.
type Reviewer struct {
	prov  Provider
	model string
	opts  CompleteOptions
}

// NewReviewer constructs a Reviewer pointing at the configured
// primary model. opts are passed through to every Complete call;
// callers tune via review-profile.
func NewReviewer(prov Provider, model string, opts CompleteOptions) *Reviewer {
	if opts.TimeoutSeconds == 0 {
		opts.TimeoutSeconds = 120
	}
	return &Reviewer{prov: prov, model: model, opts: opts}
}

// Review runs the 2-attempt flow: prompt → parse → retry-with-repair-prompt → parse.
func (r *Reviewer) Review(ctx context.Context, in ReviewInput) ReviewOutput {
	out := ReviewOutput{ChunkID: in.ChunkID}

	// Attempt 1
	prompt := buildReviewPrompt(in, false)
	raw, err := r.prov.CompleteJSON(ctx, r.model, prompt, r.opts)
	out.RawContent = raw
	if err != nil {
		out.Error = "request failed: " + err.Error()
		return out
	}
	if findings, perr := parseFindings(raw, in); perr == nil {
		out.Findings = findings
		out.Parsed = true
		return out
	}

	// Attempt 2 (repair prompt — feed the raw output back + ask for
	// strict JSON only). Done once; second failure is degraded.
	out.Retried = true
	repair := buildRepairPrompt(in, raw)
	raw2, err := r.prov.CompleteJSON(ctx, r.model, repair, r.opts)
	out.RawContent = raw + "\n\n---repair---\n\n" + raw2
	if err != nil {
		out.Error = "repair request failed: " + err.Error()
		return out
	}
	if findings, perr := parseFindings(raw2, in); perr == nil {
		out.Findings = findings
		out.Parsed = true
		return out
	} else {
		out.Error = "parse failed after repair: " + perr.Error()
	}
	return out
}

// ReviewBatch runs Review over a slice of inputs sequentially. Could
// parallelize at G3+, but local Ollama is GPU-bound and serial is
// the safe v0 — burst-parallel would queue at the model server anyway.
func (r *Reviewer) ReviewBatch(ctx context.Context, inputs []ReviewInput) []ReviewOutput {
	out := make([]ReviewOutput, 0, len(inputs))
	for _, in := range inputs {
		select {
		case <-ctx.Done():
			out = append(out, ReviewOutput{
				ChunkID: in.ChunkID,
				Error:   "context cancelled before chunk processed",
			})
			continue
		default:
		}
		out = append(out, r.Review(ctx, in))
	}
	return out
}

// === prompts ===

const reviewSystemPrompt = `You are a senior code reviewer auditing a single source file.

Your job: emit a JSON object with a "findings" array. Each finding
must include:
  - title (string, < 80 chars)
  - severity ("low" | "medium" | "high" | "critical")
  - file (string, the file path you were asked to review — verbatim)
  - line_hint (string, e.g. "42" or "100-110")
  - evidence (string, a SHORT direct quote from the file — must
    exist verbatim in the source so a downstream validator can
    grep it)
  - reason (string, one sentence explaining why this is a finding)
  - suggested_fix (string, optional, one sentence)
  - confidence (number 0.0–1.0)

Severity guidance:
  - critical: credential leak, RCE risk, destructive command,
    unauthenticated mutation
  - high: SQL injection, broad CORS, fail-open auth, unsafe FS
  - medium: hardcoded paths, weak error handling, missing tests
    near important code
  - low: naming, duplication, doc drift

Hard rules (failure = your output is rejected):
  1. Output ONLY the JSON object. No prose before or after.
  2. The evidence field MUST be a verbatim substring of the file.
     If you can't quote the source, drop the finding.
  3. Don't invent file paths, line numbers, or test names.
  4. If the file is clean, return {"findings": []}.
  5. Output nothing else when you're done.`

func buildReviewPrompt(in ReviewInput, _ bool) string {
	var b strings.Builder
	b.WriteString(reviewSystemPrompt)
	b.WriteString("\n\n---\n\n")
	b.WriteString("File path: ")
	b.WriteString(in.Description)
	b.WriteString("\nLanguage: ")
	b.WriteString(in.Language)
	b.WriteString("\n\nFile content:\n```\n")
	b.WriteString(in.Content)
	b.WriteString("\n```\n\nReturn JSON only.")
	return b.String()
}

func buildRepairPrompt(in ReviewInput, prev string) string {
	var b strings.Builder
	b.WriteString("Your previous output was not valid JSON or did not match the required schema.\n\n")
	b.WriteString("Required shape:\n")
	b.WriteString(`{"findings":[{"title":"...","severity":"...","file":"...","line_hint":"...","evidence":"...","reason":"...","confidence":0.0}]}`)
	b.WriteString("\n\nPrevious raw output (for your reference):\n")
	b.WriteString(abbrev(prev, 1500))
	b.WriteString("\n\nFor reference, the file you were reviewing was:\n")
	b.WriteString(in.Description)
	b.WriteString("\n\nReturn ONLY the JSON object now. No explanation, no markdown fences, no apology. JSON only.")
	return b.String()
}

// === parsing ===

func parseFindings(raw string, in ReviewInput) ([]analyzers.Finding, error) {
	// Strip leading/trailing whitespace + common markdown fences.
	cleaned := strings.TrimSpace(raw)
	cleaned = strings.TrimPrefix(cleaned, "```json")
	cleaned = strings.TrimPrefix(cleaned, "```")
	cleaned = strings.TrimSuffix(cleaned, "```")
	cleaned = strings.TrimSpace(cleaned)
	if cleaned == "" {
		return nil, fmt.Errorf("empty content")
	}

	var shell struct {
		Findings []struct {
			Title        string  `json:"title"`
			Severity     string  `json:"severity"`
			File         string  `json:"file"`
			LineHint     string  `json:"line_hint"`
			Evidence     string  `json:"evidence"`
			Reason       string  `json:"reason"`
			SuggestedFix string  `json:"suggested_fix"`
			Confidence   float64 `json:"confidence"`
		} `json:"findings"`
	}
	if err := json.Unmarshal([]byte(cleaned), &shell); err != nil {
		return nil, fmt.Errorf("unmarshal: %w", err)
	}

	out := make([]analyzers.Finding, 0, len(shell.Findings))
	for _, f := range shell.Findings {
		sev := normalizeSeverity(f.Severity)
		if sev == "" {
			continue // model emitted a value we don't accept
		}
		// Use the chunk's file path if model omitted/lied
		filePath := f.File
		if filePath == "" {
			filePath = in.Description
		}
		out = append(out, analyzers.Finding{
			Title:        truncate(f.Title, 80),
			Severity:     sev,
			Status:       analyzers.StatusSuspected, // validator (Phase D) promotes to confirmed
			File:         filePath,
			LineHint:     f.LineHint,
			Evidence:     f.Evidence,
			Reason:       f.Reason,
			SuggestedFix: f.SuggestedFix,
			Source:       analyzers.SourceLLM,
			Confidence:   clampFloat(f.Confidence, 0, 1),
			CheckID:      "llm.review",
		})
	}
	return out, nil
}

func normalizeSeverity(s string) analyzers.Severity {
	switch strings.ToLower(strings.TrimSpace(s)) {
	case "low":
		return analyzers.SeverityLow
	case "medium", "med":
		return analyzers.SeverityMedium
	case "high":
		return analyzers.SeverityHigh
	case "critical", "crit":
		return analyzers.SeverityCritical
	}
	return ""
}

func truncate(s string, n int) string {
	if len(s) <= n {
		return s
	}
	return s[:n]
}

func clampFloat(v, lo, hi float64) float64 {
	if v < lo {
		return lo
	}
	if v > hi {
		return hi
	}
	return v
}

// === chunking ===

// ChunkInputsFromScan produces one ReviewInput per file under the
// configured size limit. Files larger than maxBytes are skipped (the
// LLM phase notes them in the receipt as "skipped: too large"). v0
// is per-file; per-function chunking lands in Phase D+.
func ChunkInputsFromScan(scan *scanner.Result, maxBytes int, maxChunkChars int, readFile func(abs string) string) []ReviewInput {
	out := []ReviewInput{}
	for _, f := range scan.Files {
		if f.Language == "" {
			continue // non-code files: skip LLM review (analyzers may still flag)
		}
		if f.Size > int64(maxBytes) {
			continue
		}
		content := readFile(f.Abs)
		if len(content) > maxChunkChars {
			content = content[:maxChunkChars] + "\n... (truncated for LLM context)\n"
		}
		out = append(out, ReviewInput{
			ChunkID:     f.Path,
			Description: f.Path,
			Content:     content,
			Language:    f.Language,
		})
	}
	return out
}

// Useful for callers wiring a deadline across the whole batch.
var _ = time.Now