Claude (review-harness setup) f3ee4722a8 Phase A + B (MVP) — local review harness
Implements the MVP cutline from the planning artifact:
- Phase A: skeleton + CLI dispatch + provider interface + stub model doctor
- Phase B: scanner + git probe + 12 static analyzers + reporters + pipeline
- Phase B fixtures: clean-repo, insecure-repo, degraded-repo

12 static analyzers per PROMPT.md "Suggested Static Checks For MVP":
hardcoded_paths, shell_execution, raw_sql_interpolation, broad_cors,
secret_patterns, large_files, todo_comments, missing_tests,
env_file_committed, unsafe_file_io, exposed_mutation_endpoint,
hardcoded_local_ip.

Acceptance gates passing:
- B1 (intake produces accurate counts) ✓
- B2 (insecure fixture fires ≥8 distinct check_ids — actually 11/12) ✓
- B3 (clean fixture produces 0 confirmed findings — no false positives) ✓
- B4 (scrum mode produces all 6 required markdown + JSON reports) ✓
- B5 (receipts.json marks degraded phases honestly) ✓
- F  (self-review on this repo runs without crashing) ✓ — exit 66 (degraded
  because Phase C LLM review is hardcoded skipped)

Phases C (LLM review), D (validation cross-check), E (memory + diff +
rules subcommands) deferred per the cutline. The MVP delivers the
evidence-first path; LLM is purely additive.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 00:56:02 -05:00

130 lines
3.6 KiB
Go

package analyzers
import (
"crypto/sha256"
"encoding/hex"
"os"
"path/filepath"
"strings"
"local-review-harness/internal/config"
"local-review-harness/internal/scanner"
)
// Analyzer is the contract every static check implements. Pure
// function over the scan result; no I/O outside reading files
// (which the runner does once and passes in).
type Analyzer interface {
// ID is the stable check identifier (e.g. "static.hardcoded_paths").
ID() string
// Enabled reports whether the review profile turned this check on.
Enabled(rp config.ReviewProfile) bool
// Inspect returns findings for one file. The runner skips this
// for binary / non-text files based on extension heuristics.
Inspect(file scanner.File, content string, rp config.ReviewProfile) []Finding
}
// All returns the 12 MVP analyzers. Order is stable so report
// determinism flows from analyzer ordering.
func All() []Analyzer {
return []Analyzer{
&hardcodedPathsAnalyzer{},
&shellExecAnalyzer{},
&rawSQLAnalyzer{},
&corsAnalyzer{},
&secretPatternsAnalyzer{},
&largeFilesAnalyzer{},
&todoFixmeAnalyzer{},
&missingTestsAnalyzer{},
&envFileAnalyzer{},
&unsafeFileIOAnalyzer{},
&exposedMutationAnalyzer{},
&hardcodedIPsAnalyzer{},
}
}
// Run executes every enabled analyzer over the scan result. Reads
// each text file once + dispatches the content to all analyzers.
// Files larger than rp.Limits.MaxFileBytes are skipped (analyzers
// run on file metadata only — e.g. large-files check still fires).
func Run(scan *scanner.Result, rp config.ReviewProfile) []Finding {
all := All()
enabled := make([]Analyzer, 0, len(all))
for _, a := range all {
if a.Enabled(rp) {
enabled = append(enabled, a)
}
}
findings := []Finding{}
// Per-file analyzers (read content once)
for _, f := range scan.Files {
if !isTextLike(f) {
continue
}
var content string
if f.Size <= int64(rp.Limits.MaxFileBytes) {
b, err := os.ReadFile(f.Abs)
if err == nil {
content = string(b)
}
}
for _, a := range enabled {
fs := a.Inspect(f, content, rp)
findings = append(findings, fs...)
}
}
// Repo-level analyzers (scan-result-only checks)
for _, a := range enabled {
if rl, ok := a.(repoLevelAnalyzer); ok {
findings = append(findings, rl.InspectRepo(scan, rp)...)
}
}
// Stable ID assignment per finding so memory dedup works across runs.
for i := range findings {
findings[i].ID = stableID(findings[i])
}
return findings
}
// repoLevelAnalyzer is for checks that operate on the whole scan
// (e.g. "missing tests" — only fires once per repo, not per file).
type repoLevelAnalyzer interface {
InspectRepo(scan *scanner.Result, rp config.ReviewProfile) []Finding
}
// isTextLike filters out files where regex scanning is meaningless.
// Conservative — when in doubt, scan; analyzers handle their own noise.
func isTextLike(f scanner.File) bool {
switch strings.ToLower(filepath.Ext(f.Path)) {
case ".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".ico",
".pdf", ".zip", ".tar", ".gz", ".bz2", ".xz",
".woff", ".woff2", ".ttf", ".otf",
".mp3", ".mp4", ".mov", ".wav",
".so", ".dll", ".dylib", ".exe",
".parquet", ".lance", ".arrow":
return false
}
return true
}
// stableID is sha256(check_id|file|line_hint|evidence) truncated to
// 12 hex chars. Same finding across runs → same ID. Used by memory
// for append-only dedup signal (Phase E).
func stableID(f Finding) string {
h := sha256.New()
h.Write([]byte(f.CheckID))
h.Write([]byte("|"))
h.Write([]byte(f.File))
h.Write([]byte("|"))
h.Write([]byte(f.LineHint))
h.Write([]byte("|"))
h.Write([]byte(f.Evidence))
return hex.EncodeToString(h.Sum(nil))[:12]
}