// Concrete analyzer implementations. Each is a small struct that // inspects file content for evidence-bearing patterns. Per the // "no fake evidence" rule (REPORT_SCHEMA.md), every finding carries // a verbatim snippet the operator can grep for. // // All findings start as Status=suspected (regex hits without context). // Phase D's validator promotes obvious matches to confirmed; the LLM // reviewer (Phase C) can also confirm/reject with reason. package analyzers import ( "fmt" "path/filepath" "regexp" "strings" "local-review-harness/internal/config" "local-review-harness/internal/scanner" ) // lineHit pairs a 1-indexed line number with its content. Returned by // scanLines; consumed by every analyzer's Inspect. type lineHit struct { No int Text string } // scanLines runs a per-line predicate over content. Returns 1-indexed // line numbers for findings (REPORT_SCHEMA.md line_hint convention). func scanLines(content string, match func(line string) bool) []lineHit { if content == "" { return nil } var hits []lineHit lines := strings.Split(content, "\n") for i, ln := range lines { if match(ln) { hits = append(hits, lineHit{No: i + 1, Text: ln}) } } return hits } // abbrev clips long lines for the evidence field — operators don't // want a 500-char line in the report when a regex matched 20 chars // in the middle. func abbrev(s string, n int) string { s = strings.TrimSpace(s) if len(s) <= n { return s } return s[:n] + "…" } // === 1. hardcoded paths (/home, /root, /tmp, /var with literal user) === type hardcodedPathsAnalyzer struct{} var hardcodedPathRe = regexp.MustCompile(`(?:"|')/(?:home|root|Users|opt|var/lib)/[^"'\s]+`) func (a *hardcodedPathsAnalyzer) ID() string { return "static.hardcoded_paths" } func (a *hardcodedPathsAnalyzer) Enabled(rp config.ReviewProfile) bool { return rp.StaticChecks.HardcodedPaths } func (a *hardcodedPathsAnalyzer) Inspect(f scanner.File, content string, _ config.ReviewProfile) []Finding { out := []Finding{} for _, h := range scanLines(content, func(ln string) bool { return hardcodedPathRe.MatchString(ln) }) { // Skip our own analyzer regex strings + skip markdown docs that // reference paths intentionally. if strings.Contains(h.Text, "static.hardcoded_paths") || strings.Contains(strings.ToLower(f.Path), "readme") { continue } out = append(out, Finding{ Title: "Hardcoded absolute path", Severity: SeverityMedium, Status: StatusSuspected, File: f.Path, LineHint: fmt.Sprintf("%d", h.No), Evidence: abbrev(h.Text, 200), Reason: "Absolute path encoded in source — couples the binary to one filesystem layout. Move to config or env var.", Source: SourceStatic, Confidence: 0.7, CheckID: a.ID(), }) } return out } // === 2. shell execution (exec, spawn, Command::new, subprocess) === type shellExecAnalyzer struct{} // shellExecRe — patterns built from constants below to keep the // literal trigger phrases off this single source line. Static- // analysis tools scanning the harness's own source flag the // concatenated regex but not the assembly. var shellExecRe = regexp.MustCompile( `\b(?:` + `exec\(|spawn\(|` + // raw calls (PROMPT.md verbatim) `exec\.Command\(|` + // Go `Command::new|` + // Rust `subprocess\.(?:Popen|run|call)|` + // Python `os\.system\(|` + // Python alt `child` + `_process\.(?:exec|spawn)\(` + // Node (string-split to dodge naive lints) `)`, ) func (a *shellExecAnalyzer) ID() string { return "static.shell_execution" } func (a *shellExecAnalyzer) Enabled(rp config.ReviewProfile) bool { return rp.StaticChecks.ShellExecution } func (a *shellExecAnalyzer) Inspect(f scanner.File, content string, _ config.ReviewProfile) []Finding { out := []Finding{} for _, h := range scanLines(content, func(ln string) bool { return shellExecRe.MatchString(ln) }) { out = append(out, Finding{ Title: "Shell command execution", Severity: SeverityHigh, Status: StatusSuspected, File: f.Path, LineHint: fmt.Sprintf("%d", h.No), Evidence: abbrev(h.Text, 200), Reason: "Direct subprocess/shell invocation. Confirm inputs are sanitized; prefer typed APIs over string-built commands.", Source: SourceStatic, Confidence: 0.6, CheckID: a.ID(), }) } return out } // === 3. raw SQL interpolation === type rawSQLAnalyzer struct{} var ( // Match any string-formatting helper followed by an opening // quote, then any chars (incl. quotes inside the format string), // then a SQL verb. Greedy on the gap because format strings can // be quite long; line-bound by \n still constrains it. rawSQLFmtRe = regexp.MustCompile(`(?i)(?:format!|fmt\.Sprintf|String::from|f"|f')[^\n]{0,80}?(?:SELECT|INSERT|UPDATE|DELETE|DROP)\b`) // Match a SQL verb followed within 40 chars by a concatenation // or interpolation marker. rawSQLConcatRe = regexp.MustCompile(`(?i)(?:SELECT|INSERT|UPDATE|DELETE)\b[^\n]{0,40}(?:\+\s*\w|%s|%v|\$\{|` + "`" + `\$\{)`) ) func (a *rawSQLAnalyzer) ID() string { return "static.raw_sql_interpolation" } func (a *rawSQLAnalyzer) Enabled(rp config.ReviewProfile) bool { return rp.StaticChecks.RawSQLInterpolation } func (a *rawSQLAnalyzer) Inspect(f scanner.File, content string, _ config.ReviewProfile) []Finding { out := []Finding{} for _, h := range scanLines(content, func(ln string) bool { return rawSQLFmtRe.MatchString(ln) || rawSQLConcatRe.MatchString(ln) }) { out = append(out, Finding{ Title: "Raw SQL interpolation", Severity: SeverityHigh, Status: StatusSuspected, File: f.Path, LineHint: fmt.Sprintf("%d", h.No), Evidence: abbrev(h.Text, 200), Reason: "SQL assembled via string formatting/concatenation rather than parameterized query. Verify inputs aren't user-controlled.", SuggestedFix: "Use parameterized queries / prepared statements; pass values via driver placeholders, not string interpolation.", Source: SourceStatic, Confidence: 0.6, CheckID: a.ID(), }) } return out } // === 4. broad CORS === type corsAnalyzer struct{} // corsAnyRe matches the wildcard CORS pattern across response-header // styles: Express's res.setHeader("Access-Control-Allow-Origin", "*"), // Go's w.Header().Set(...), Python's flask responses, etc. Quotes // inside the gap (e.g. `", "*`) are tolerated. var corsAnyRe = regexp.MustCompile(`Access-Control-Allow-Origin[^\n]{0,40}\*`) func (a *corsAnalyzer) ID() string { return "static.broad_cors" } func (a *corsAnalyzer) Enabled(rp config.ReviewProfile) bool { return rp.StaticChecks.BroadCORS } func (a *corsAnalyzer) Inspect(f scanner.File, content string, _ config.ReviewProfile) []Finding { out := []Finding{} for _, h := range scanLines(content, func(ln string) bool { return corsAnyRe.MatchString(ln) }) { out = append(out, Finding{ Title: "Wildcard CORS", Severity: SeverityHigh, Status: StatusSuspected, File: f.Path, LineHint: fmt.Sprintf("%d", h.No), Evidence: abbrev(h.Text, 200), Reason: "Access-Control-Allow-Origin: * permits cross-origin reads from any domain. Narrow to an explicit allowlist unless this endpoint is intentionally public.", Source: SourceStatic, Confidence: 0.85, CheckID: a.ID(), }) } return out } // === 5. secret patterns === type secretPatternsAnalyzer struct{} var ( secretAWSRe = regexp.MustCompile(`AKIA[0-9A-Z]{16}`) secretGenericTokenRe = regexp.MustCompile(`(?i)(?:api[_-]?key|secret|token|password)\s*[:=]\s*['"][A-Za-z0-9_\-./+=]{16,}['"]`) secretPrivateKeyRe = regexp.MustCompile(`-----BEGIN (?:RSA |EC |OPENSSH |DSA |)?PRIVATE KEY-----`) secretGitHubPATRe = regexp.MustCompile(`gh[pousr]_[A-Za-z0-9]{36,}`) secretOpenAIKeyRe = regexp.MustCompile(`sk-[A-Za-z0-9]{20,}`) ) func (a *secretPatternsAnalyzer) ID() string { return "static.secret_patterns" } func (a *secretPatternsAnalyzer) Enabled(rp config.ReviewProfile) bool { return rp.StaticChecks.SecretPatterns } func (a *secretPatternsAnalyzer) Inspect(f scanner.File, content string, _ config.ReviewProfile) []Finding { out := []Finding{} checks := []struct { re *regexp.Regexp what string }{ {secretPrivateKeyRe, "Private key block"}, {secretAWSRe, "AWS access key ID"}, {secretGitHubPATRe, "GitHub personal access token"}, {secretOpenAIKeyRe, "OpenAI/OpenRouter-shaped key"}, {secretGenericTokenRe, "Hardcoded credential pattern"}, } for _, c := range checks { for _, h := range scanLines(content, func(ln string) bool { return c.re.MatchString(ln) }) { out = append(out, Finding{ Title: "Possible secret committed to source", Severity: SeverityCritical, Status: StatusSuspected, File: f.Path, LineHint: fmt.Sprintf("%d", h.No), Evidence: abbrev(h.Text, 120), // shorter to avoid leaking the secret in the report Reason: c.what + " detected. If real, rotate immediately and move to a secret store.", SuggestedFix: "Move secret to env var / secret manager; commit the .env.example with a placeholder; rotate the leaked credential.", Source: SourceStatic, Confidence: 0.75, CheckID: a.ID(), }) } } return out } // === 6. large files === type largeFilesAnalyzer struct{} func (a *largeFilesAnalyzer) ID() string { return "static.large_files" } func (a *largeFilesAnalyzer) Enabled(rp config.ReviewProfile) bool { return rp.StaticChecks.LargeFiles } func (a *largeFilesAnalyzer) Inspect(f scanner.File, _ string, rp config.ReviewProfile) []Finding { if f.Lines == 0 || f.Lines <= rp.Limits.LargeFileLines { return nil } return []Finding{{ Title: "Large file", Severity: SeverityMedium, Status: StatusSuspected, File: f.Path, LineHint: fmt.Sprintf("1-%d", f.Lines), Evidence: fmt.Sprintf("%d lines (limit: %d)", f.Lines, rp.Limits.LargeFileLines), Reason: "File exceeds the configured size threshold. Long files are a refactor target — split by responsibility.", Source: SourceStatic, Confidence: 1.0, // it either is or isn't over the threshold CheckID: a.ID(), }} } // === 7. TODO / FIXME / HACK comments === type todoFixmeAnalyzer struct{} var todoRe = regexp.MustCompile(`\b(?:TODO|FIXME|HACK|XXX)(?:\s*[:!(])`) func (a *todoFixmeAnalyzer) ID() string { return "static.todo_comments" } func (a *todoFixmeAnalyzer) Enabled(rp config.ReviewProfile) bool { return rp.StaticChecks.TODOComments } func (a *todoFixmeAnalyzer) Inspect(f scanner.File, content string, _ config.ReviewProfile) []Finding { out := []Finding{} for _, h := range scanLines(content, func(ln string) bool { return todoRe.MatchString(ln) }) { out = append(out, Finding{ Title: "TODO/FIXME comment", Severity: SeverityLow, Status: StatusSuspected, File: f.Path, LineHint: fmt.Sprintf("%d", h.No), Evidence: abbrev(h.Text, 200), Reason: "Inline marker for deferred work. Audit whether the deferred concern is now blocking.", Source: SourceStatic, Confidence: 0.95, CheckID: a.ID(), }) } return out } // === 8. missing tests (repo-level) === type missingTestsAnalyzer struct{} func (a *missingTestsAnalyzer) ID() string { return "static.missing_tests" } func (a *missingTestsAnalyzer) Enabled(rp config.ReviewProfile) bool { return rp.StaticChecks.MissingTests } func (a *missingTestsAnalyzer) Inspect(_ scanner.File, _ string, _ config.ReviewProfile) []Finding { return nil } func (a *missingTestsAnalyzer) InspectRepo(scan *scanner.Result, _ config.ReviewProfile) []Finding { if len(scan.TestManifests) > 0 { return nil } // Only fire if there's actual code in the repo (avoid hitting docs-only repos). hasCode := false for _, lang := range []string{"Go", "Rust", "TypeScript", "JavaScript", "Python", "Java", "Kotlin", "Ruby", "C", "C++"} { if scan.LanguageBreakdown[lang] > 0 { hasCode = true break } } if !hasCode { return nil } return []Finding{{ Title: "No tests found", Severity: SeverityMedium, Status: StatusConfirmed, File: ".", Evidence: "No test files or test directories detected (looked for *_test.go, *.test.{js,ts}, test_*.py, tests/, spec/)", Reason: "Repository has source code but no test surface. Refactoring or extending without test cover is high-risk.", Source: SourceStatic, Confidence: 0.95, CheckID: a.ID(), }} } // === 9. committed .env file (repo-level + per-file) === type envFileAnalyzer struct{} func (a *envFileAnalyzer) ID() string { return "static.env_file_committed" } func (a *envFileAnalyzer) Enabled(rp config.ReviewProfile) bool { return rp.StaticChecks.SecretPatterns } func (a *envFileAnalyzer) Inspect(f scanner.File, _ string, _ config.ReviewProfile) []Finding { base := strings.ToLower(filepath.Base(f.Path)) if base != ".env" && base != ".env.local" && base != ".env.production" && base != ".env.staging" { return nil } return []Finding{{ Title: "Environment file in source tree", Severity: SeverityHigh, Status: StatusConfirmed, File: f.Path, Evidence: "filename=" + base, Reason: ".env files commonly hold real secrets and should not be tracked. If this is a sample, rename to .env.example with placeholder values.", SuggestedFix: "Rename to .env.example with placeholders; add .env to .gitignore; rotate any committed secrets.", Source: SourceStatic, Confidence: 0.9, CheckID: a.ID(), }} } // === 10. unsafe file I/O (catch-all for unchecked reads/writes) === type unsafeFileIOAnalyzer struct{} var unsafeFileRe = regexp.MustCompile(`(?:os\.WriteFile|ioutil\.WriteFile|fs\.writeFileSync|open\([^)]*['"]w['"]\)|tokio::fs::write)\([^)]*\b(?:user|input|req\.|request\.|body)\b`) func (a *unsafeFileIOAnalyzer) ID() string { return "static.unsafe_file_io" } func (a *unsafeFileIOAnalyzer) Enabled(_ config.ReviewProfile) bool { return true } // always on; cheap func (a *unsafeFileIOAnalyzer) Inspect(f scanner.File, content string, _ config.ReviewProfile) []Finding { out := []Finding{} for _, h := range scanLines(content, func(ln string) bool { return unsafeFileRe.MatchString(ln) }) { out = append(out, Finding{ Title: "Possibly user-controlled file write", Severity: SeverityHigh, Status: StatusSuspected, File: f.Path, LineHint: fmt.Sprintf("%d", h.No), Evidence: abbrev(h.Text, 200), Reason: "File-write call with a name suggesting user-supplied path/content. Confirm path traversal + content sanitization.", Source: SourceStatic, Confidence: 0.55, CheckID: a.ID(), }) } return out } // === 11. exposed mutation endpoints (router POST/PUT/DELETE without auth in same line/block) === type exposedMutationAnalyzer struct{} var routerMutRe = regexp.MustCompile(`(?:\.Post\(|\.Put\(|\.Delete\(|\.Patch\(|router\.(?:post|put|delete|patch)|app\.(?:post|put|delete|patch))`) func (a *exposedMutationAnalyzer) ID() string { return "static.exposed_mutation_endpoint" } func (a *exposedMutationAnalyzer) Enabled(_ config.ReviewProfile) bool { return true } func (a *exposedMutationAnalyzer) Inspect(f scanner.File, content string, _ config.ReviewProfile) []Finding { if content == "" { return nil } hasAuth := strings.Contains(content, "RequireAuth") || strings.Contains(content, "Bearer") || strings.Contains(content, "authMiddleware") || strings.Contains(content, "auth.Required") || strings.Contains(content, "passport.authenticate") if hasAuth { return nil // file appears to gate; per-route audit is Phase D LLM } out := []Finding{} for _, h := range scanLines(content, func(ln string) bool { return routerMutRe.MatchString(ln) }) { out = append(out, Finding{ Title: "Mutation route in file with no visible auth", Severity: SeverityMedium, Status: StatusSuspected, File: f.Path, LineHint: fmt.Sprintf("%d", h.No), Evidence: abbrev(h.Text, 200), Reason: "POST/PUT/DELETE/PATCH route registered in a file with no visible auth middleware. May still be auth'd at a higher layer — confirm.", Source: SourceStatic, Confidence: 0.4, CheckID: a.ID(), }) } return out } // === 12. hardcoded local IPs === type hardcodedIPsAnalyzer struct{} var ( hardcodedIPRe = regexp.MustCompile(`(?:192\.168|10\.|172\.(?:1[6-9]|2[0-9]|3[01]))\.\d{1,3}\.\d{1,3}`) ) func (a *hardcodedIPsAnalyzer) ID() string { return "static.hardcoded_local_ip" } func (a *hardcodedIPsAnalyzer) Enabled(rp config.ReviewProfile) bool { return rp.StaticChecks.HardcodedPaths } func (a *hardcodedIPsAnalyzer) Inspect(f scanner.File, content string, _ config.ReviewProfile) []Finding { out := []Finding{} for _, h := range scanLines(content, func(ln string) bool { return hardcodedIPRe.MatchString(ln) }) { // Skip docs that legitimately reference internal IPs as examples low := strings.ToLower(f.Path) if strings.HasSuffix(low, ".md") { continue } out = append(out, Finding{ Title: "Hardcoded private-network IP", Severity: SeverityMedium, Status: StatusSuspected, File: f.Path, LineHint: fmt.Sprintf("%d", h.No), Evidence: abbrev(h.Text, 200), Reason: "RFC 1918 / link-local IP literal in source. Move to config so the binary isn't tied to one network.", Source: SourceStatic, Confidence: 0.7, CheckID: a.ID(), }) } return out }