Implements the MVP cutline from the planning artifact: - Phase A: skeleton + CLI dispatch + provider interface + stub model doctor - Phase B: scanner + git probe + 12 static analyzers + reporters + pipeline - Phase B fixtures: clean-repo, insecure-repo, degraded-repo 12 static analyzers per PROMPT.md "Suggested Static Checks For MVP": hardcoded_paths, shell_execution, raw_sql_interpolation, broad_cors, secret_patterns, large_files, todo_comments, missing_tests, env_file_committed, unsafe_file_io, exposed_mutation_endpoint, hardcoded_local_ip. Acceptance gates passing: - B1 (intake produces accurate counts) ✓ - B2 (insecure fixture fires ≥8 distinct check_ids — actually 11/12) ✓ - B3 (clean fixture produces 0 confirmed findings — no false positives) ✓ - B4 (scrum mode produces all 6 required markdown + JSON reports) ✓ - B5 (receipts.json marks degraded phases honestly) ✓ - F (self-review on this repo runs without crashing) ✓ — exit 66 (degraded because Phase C LLM review is hardcoded skipped) Phases C (LLM review), D (validation cross-check), E (memory + diff + rules subcommands) deferred per the cutline. The MVP delivers the evidence-first path; LLM is purely additive. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
130 lines
3.6 KiB
Go
130 lines
3.6 KiB
Go
package analyzers
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"local-review-harness/internal/config"
|
|
"local-review-harness/internal/scanner"
|
|
)
|
|
|
|
// Analyzer is the contract every static check implements. Pure
|
|
// function over the scan result; no I/O outside reading files
|
|
// (which the runner does once and passes in).
|
|
type Analyzer interface {
|
|
// ID is the stable check identifier (e.g. "static.hardcoded_paths").
|
|
ID() string
|
|
|
|
// Enabled reports whether the review profile turned this check on.
|
|
Enabled(rp config.ReviewProfile) bool
|
|
|
|
// Inspect returns findings for one file. The runner skips this
|
|
// for binary / non-text files based on extension heuristics.
|
|
Inspect(file scanner.File, content string, rp config.ReviewProfile) []Finding
|
|
}
|
|
|
|
// All returns the 12 MVP analyzers. Order is stable so report
|
|
// determinism flows from analyzer ordering.
|
|
func All() []Analyzer {
|
|
return []Analyzer{
|
|
&hardcodedPathsAnalyzer{},
|
|
&shellExecAnalyzer{},
|
|
&rawSQLAnalyzer{},
|
|
&corsAnalyzer{},
|
|
&secretPatternsAnalyzer{},
|
|
&largeFilesAnalyzer{},
|
|
&todoFixmeAnalyzer{},
|
|
&missingTestsAnalyzer{},
|
|
&envFileAnalyzer{},
|
|
&unsafeFileIOAnalyzer{},
|
|
&exposedMutationAnalyzer{},
|
|
&hardcodedIPsAnalyzer{},
|
|
}
|
|
}
|
|
|
|
// Run executes every enabled analyzer over the scan result. Reads
|
|
// each text file once + dispatches the content to all analyzers.
|
|
// Files larger than rp.Limits.MaxFileBytes are skipped (analyzers
|
|
// run on file metadata only — e.g. large-files check still fires).
|
|
func Run(scan *scanner.Result, rp config.ReviewProfile) []Finding {
|
|
all := All()
|
|
enabled := make([]Analyzer, 0, len(all))
|
|
for _, a := range all {
|
|
if a.Enabled(rp) {
|
|
enabled = append(enabled, a)
|
|
}
|
|
}
|
|
|
|
findings := []Finding{}
|
|
|
|
// Per-file analyzers (read content once)
|
|
for _, f := range scan.Files {
|
|
if !isTextLike(f) {
|
|
continue
|
|
}
|
|
var content string
|
|
if f.Size <= int64(rp.Limits.MaxFileBytes) {
|
|
b, err := os.ReadFile(f.Abs)
|
|
if err == nil {
|
|
content = string(b)
|
|
}
|
|
}
|
|
for _, a := range enabled {
|
|
fs := a.Inspect(f, content, rp)
|
|
findings = append(findings, fs...)
|
|
}
|
|
}
|
|
|
|
// Repo-level analyzers (scan-result-only checks)
|
|
for _, a := range enabled {
|
|
if rl, ok := a.(repoLevelAnalyzer); ok {
|
|
findings = append(findings, rl.InspectRepo(scan, rp)...)
|
|
}
|
|
}
|
|
|
|
// Stable ID assignment per finding so memory dedup works across runs.
|
|
for i := range findings {
|
|
findings[i].ID = stableID(findings[i])
|
|
}
|
|
return findings
|
|
}
|
|
|
|
// repoLevelAnalyzer is for checks that operate on the whole scan
|
|
// (e.g. "missing tests" — only fires once per repo, not per file).
|
|
type repoLevelAnalyzer interface {
|
|
InspectRepo(scan *scanner.Result, rp config.ReviewProfile) []Finding
|
|
}
|
|
|
|
// isTextLike filters out files where regex scanning is meaningless.
|
|
// Conservative — when in doubt, scan; analyzers handle their own noise.
|
|
func isTextLike(f scanner.File) bool {
|
|
switch strings.ToLower(filepath.Ext(f.Path)) {
|
|
case ".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".ico",
|
|
".pdf", ".zip", ".tar", ".gz", ".bz2", ".xz",
|
|
".woff", ".woff2", ".ttf", ".otf",
|
|
".mp3", ".mp4", ".mov", ".wav",
|
|
".so", ".dll", ".dylib", ".exe",
|
|
".parquet", ".lance", ".arrow":
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
// stableID is sha256(check_id|file|line_hint|evidence) truncated to
|
|
// 12 hex chars. Same finding across runs → same ID. Used by memory
|
|
// for append-only dedup signal (Phase E).
|
|
func stableID(f Finding) string {
|
|
h := sha256.New()
|
|
h.Write([]byte(f.CheckID))
|
|
h.Write([]byte("|"))
|
|
h.Write([]byte(f.File))
|
|
h.Write([]byte("|"))
|
|
h.Write([]byte(f.LineHint))
|
|
h.Write([]byte("|"))
|
|
h.Write([]byte(f.Evidence))
|
|
return hex.EncodeToString(h.Sum(nil))[:12]
|
|
}
|