package analyzers import ( "crypto/sha256" "encoding/hex" "os" "path/filepath" "strings" "local-review-harness/internal/config" "local-review-harness/internal/scanner" ) // Analyzer is the contract every static check implements. Pure // function over the scan result; no I/O outside reading files // (which the runner does once and passes in). type Analyzer interface { // ID is the stable check identifier (e.g. "static.hardcoded_paths"). ID() string // Enabled reports whether the review profile turned this check on. Enabled(rp config.ReviewProfile) bool // Inspect returns findings for one file. The runner skips this // for binary / non-text files based on extension heuristics. Inspect(file scanner.File, content string, rp config.ReviewProfile) []Finding } // All returns the 12 MVP analyzers. Order is stable so report // determinism flows from analyzer ordering. func All() []Analyzer { return []Analyzer{ &hardcodedPathsAnalyzer{}, &shellExecAnalyzer{}, &rawSQLAnalyzer{}, &corsAnalyzer{}, &secretPatternsAnalyzer{}, &largeFilesAnalyzer{}, &todoFixmeAnalyzer{}, &missingTestsAnalyzer{}, &envFileAnalyzer{}, &unsafeFileIOAnalyzer{}, &exposedMutationAnalyzer{}, &hardcodedIPsAnalyzer{}, } } // Run executes every enabled analyzer over the scan result. Reads // each text file once + dispatches the content to all analyzers. // Files larger than rp.Limits.MaxFileBytes are skipped (analyzers // run on file metadata only — e.g. large-files check still fires). func Run(scan *scanner.Result, rp config.ReviewProfile) []Finding { all := All() enabled := make([]Analyzer, 0, len(all)) for _, a := range all { if a.Enabled(rp) { enabled = append(enabled, a) } } findings := []Finding{} // Per-file analyzers (read content once) for _, f := range scan.Files { if !isTextLike(f) { continue } var content string if f.Size <= int64(rp.Limits.MaxFileBytes) { b, err := os.ReadFile(f.Abs) if err == nil { content = string(b) } } for _, a := range enabled { fs := a.Inspect(f, content, rp) findings = append(findings, fs...) } } // Repo-level analyzers (scan-result-only checks) for _, a := range enabled { if rl, ok := a.(repoLevelAnalyzer); ok { findings = append(findings, rl.InspectRepo(scan, rp)...) } } // Stable ID assignment per finding so memory dedup works across runs. for i := range findings { findings[i].ID = stableID(findings[i]) } return findings } // repoLevelAnalyzer is for checks that operate on the whole scan // (e.g. "missing tests" — only fires once per repo, not per file). type repoLevelAnalyzer interface { InspectRepo(scan *scanner.Result, rp config.ReviewProfile) []Finding } // isTextLike filters out files where regex scanning is meaningless. // Conservative — when in doubt, scan; analyzers handle their own noise. func isTextLike(f scanner.File) bool { switch strings.ToLower(filepath.Ext(f.Path)) { case ".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".ico", ".pdf", ".zip", ".tar", ".gz", ".bz2", ".xz", ".woff", ".woff2", ".ttf", ".otf", ".mp3", ".mp4", ".mov", ".wav", ".so", ".dll", ".dylib", ".exe", ".parquet", ".lance", ".arrow": return false } return true } // stableID is sha256(check_id|file|line_hint|evidence) truncated to // 12 hex chars. Same finding across runs → same ID. Used by memory // for append-only dedup signal (Phase E). func stableID(f Finding) string { h := sha256.New() h.Write([]byte(f.CheckID)) h.Write([]byte("|")) h.Write([]byte(f.File)) h.Write([]byte("|")) h.Write([]byte(f.LineHint)) h.Write([]byte("|")) h.Write([]byte(f.Evidence)) return hex.EncodeToString(h.Sum(nil))[:12] }