// Minimal gitignore-pattern matcher for the scanner. Spec subset // covering the patterns real-world .gitignore files use ~99% of the // time: // // pattern — basename match anywhere in tree // pattern/ — dir-only match (anywhere) // /pattern — anchored at repo root // pattern/sub — path-anchored (relative to repo root) // *.ext — extension glob // path/*.ext — path + extension // #comment — ignored (when at start of line) // blank line — ignored // // What's intentionally NOT supported (rare + adds complexity): // !pattern — negation. We skip the line and surface a warning // so operators know a negation didn't apply. // ** — recursive glob. Treat literally as "**" basename. // In practice the spec means "any depth" but most // real patterns use trailing /** or /pattern/** which // we approximate via path prefix match. // \ — escape char. Treated literally (rare in real // .gitignore files). // // This is enough for the Lakehouse Rust .gitignore (14 patterns, // 100% covered) and the harness's own .gitignore (12 patterns, // 100% covered). package scanner import ( "bufio" "os" "path/filepath" "strings" ) // Matcher tests whether a path should be skipped per the loaded // .gitignore. Built once per Walk; reads the file at repoPath/.gitignore. type Matcher struct { repoPath string rules []rule negationsSeen bool } type rule struct { pattern string // original pattern as written dirOnly bool // matches only directories (trailing /) anchored bool // anchored at repo root (leading / or has interior /) hasGlob bool // contains * or ? pathParts []string // for anchored multi-segment patterns } // LoadGitignore reads .gitignore at the repo root + returns a Matcher. // Missing file → empty matcher (matches nothing, behaves like the // pre-2026-04-30 scanner). func LoadGitignore(repoPath string) (*Matcher, error) { m := &Matcher{repoPath: repoPath} f, err := os.Open(filepath.Join(repoPath, ".gitignore")) if err != nil { if os.IsNotExist(err) { return m, nil } return m, err } defer f.Close() scanner := bufio.NewScanner(f) for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) if line == "" || strings.HasPrefix(line, "#") { continue } if strings.HasPrefix(line, "!") { m.negationsSeen = true continue // skip negations for v0 } m.rules = append(m.rules, compileRule(line)) } return m, scanner.Err() } // HasNegations reports whether the .gitignore contained any `!pattern` // lines (not supported in v0). Callers can surface this in receipts. func (m *Matcher) HasNegations() bool { return m != nil && m.negationsSeen } // Skip reports whether path (absolute) should be excluded under the // loaded rules. isDir affects dir-only rules. Empty matcher → false // (caller falls back to other skip logic). func (m *Matcher) Skip(absPath string, isDir bool) bool { if m == nil || len(m.rules) == 0 { return false } rel, err := filepath.Rel(m.repoPath, absPath) if err != nil { return false } rel = filepath.ToSlash(rel) if rel == "." { return false } base := filepath.Base(rel) for _, r := range m.rules { if r.dirOnly && !isDir { continue } if matchRule(r, rel, base) { return true } } return false } // compileRule normalizes one .gitignore line into a Rule. func compileRule(line string) rule { r := rule{pattern: line} // Trailing slash → dir-only if strings.HasSuffix(line, "/") { r.dirOnly = true line = strings.TrimSuffix(line, "/") } // Leading slash → anchored if strings.HasPrefix(line, "/") { r.anchored = true line = strings.TrimPrefix(line, "/") } // Interior slash also anchors (per gitignore spec): // `path/foo` matches only the top-level path/foo, not path/foo // nested deeper. if strings.Contains(line, "/") { r.anchored = true } r.hasGlob = strings.ContainsAny(line, "*?[") r.pathParts = strings.Split(line, "/") r.pattern = line return r } func matchRule(r rule, rel, base string) bool { if r.anchored { // Match the whole rel path against the rule's path return matchPath(r.pathParts, strings.Split(rel, "/")) } // Unanchored — match basename, but ALSO allow subpath match for // patterns like "node_modules" that should match // "vendor/node_modules" too. if r.hasGlob { ok, _ := filepath.Match(r.pattern, base) if ok { return true } } else { if r.pattern == base { return true } // Walk path segments — any segment matching the pattern triggers. for _, seg := range strings.Split(rel, "/") { if seg == r.pattern { return true } } } return false } // matchPath compares an anchored pattern's segments against the // path's segments. Each segment compared via filepath.Match (handles // globs). Length must match exactly — trailing-slash → dir which // the caller already filtered. func matchPath(patternParts, pathParts []string) bool { if len(patternParts) > len(pathParts) { return false } // For anchored patterns, pattern matches when its segments line // up at the start of the path. Allows trailing path segments // (so /target matches target/x.go AND target itself). for i, pp := range patternParts { ok, err := filepath.Match(pp, pathParts[i]) if err != nil || !ok { return false } } return true }