From 6af0520ed2520822b6b6239f6501b8f10f42262c Mon Sep 17 00:00:00 2001 From: root Date: Wed, 29 Apr 2026 05:56:42 -0500 Subject: [PATCH] =?UTF-8?q?A:=20fail-loud=20on=20non-loopback=20bind=20?= =?UTF-8?q?=E2=80=94=20closes=20worst=20case=20of=20R-001?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit shared.Run now refuses to bind a non-loopback address unless the LH__ALLOW_NONLOOPBACK=1 env is set. Single change covers all 7 binaries via the existing Run call site; no per-binary wiring needed. Closes the accidental-0.0.0.0 deploy attack surface for R-001: queryd /sql is RCE-equivalent off loopback (DuckDB has filesystem read + COPY TO + read_text), but the gate applies to every binary uniformly so the same posture covers vectord (mutation routes), catalogd (manifest writes), and the others. What passes the gate: 127.0.0.1:port, 127.x.y.z:port (full /8), [::1]:port, localhost:port, OR explicit env LH__ALLOW_NONLOOPBACK=1 What fail-louds: 0.0.0.0:port, [::]:port, :port (all interfaces), any non-loopback IP, any non-localhost hostname, unparseable shapes ("", "no port", garbage) Override env is strict equality "1" — typos like "true"/"yes" do NOT trigger it, so a future operator can't accidentally expose by typing the wrong value. Override fires log a structured warn so the choice is auditable in production. Error message cites the env name AND R-001 by name so operators see the fix path without grepping: "refusing non-loopback bind \"0.0.0.0:3214\" for \"queryd\" (set LH_QUERYD_ALLOW_NONLOOPBACK=1 to override; see audit R-001)" internal/shared/bind.go — requireLoopbackOrOverride + isLoopbackAddr internal/shared/bind_test.go — 7 test funcs incl. table-driven IPv4/IPv6/hostname coverage and per-service env isolation internal/shared/server.go — 1-line gate in Run before listen Verified: go test -short ./internal/shared/ — all green (was 14 funcs, now 21) just verify — vet + test + 9 smokes still 33s Doesn't address R-001's full attack surface (any reachable port can issue arbitrary SQL); ADR-003 + Bearer-token middleware is the follow-up. This commit makes the implicit "localhost-only is the auth layer" guarantee explicit and un-bypassable without explicit env. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/shared/bind.go | 75 ++++++++++++++++++++ internal/shared/bind_test.go | 129 +++++++++++++++++++++++++++++++++++ internal/shared/server.go | 9 +++ 3 files changed, 213 insertions(+) create mode 100644 internal/shared/bind.go create mode 100644 internal/shared/bind_test.go diff --git a/internal/shared/bind.go b/internal/shared/bind.go new file mode 100644 index 0000000..8b49f48 --- /dev/null +++ b/internal/shared/bind.go @@ -0,0 +1,75 @@ +package shared + +import ( + "fmt" + "log/slog" + "net" + "os" + "strings" +) + +// requireLoopbackOrOverride enforces that the bind address is on the +// loopback interface unless an explicit env override is set. Closes +// the worst case of audit risk R-001 (queryd /sql + DuckDB + non- +// loopback bind = RCE-equivalent for anyone who can reach the port) +// without committing to an auth model. +// +// Override env: LH__ALLOW_NONLOOPBACK=1. +// When the override fires, we log a structured warn so the choice is +// auditable in production logs. +// +// Cases that pass: +// - 127.0.0.1, 127.x.y.z (the /8), [::1], localhost +// - explicit-override env set to "1" +// +// Cases that fail-loud: +// - 0.0.0.0, [::], any non-loopback IP +// - empty host ":port" (listens on all interfaces) +// - unparseable addr +// +// The function is also useful as a unit-testable predicate; callers +// that want to gate something other than Run can call it directly. +func requireLoopbackOrOverride(serviceName, addr string) error { + if isLoopbackAddr(addr) { + return nil + } + envKey := "LH_" + strings.ToUpper(serviceName) + "_ALLOW_NONLOOPBACK" + if os.Getenv(envKey) == "1" { + slog.Warn("non-loopback bind allowed by env override", + "service", serviceName, + "addr", addr, + "env", envKey, + "hint", "audit risk R-001 — see reports/scrum/risk-register.md") + return nil + } + return fmt.Errorf("refusing non-loopback bind %q for %q "+ + "(set %s=1 to override; see audit R-001)", addr, serviceName, envKey) +} + +// isLoopbackAddr returns true iff addr's host portion is on the +// loopback interface. Covers IPv4 127.0.0.0/8, IPv6 ::1, and +// "localhost". Empty host (":port"), empty string, and any +// non-parseable addr return false. +func isLoopbackAddr(addr string) bool { + host, _, err := net.SplitHostPort(addr) + if err != nil { + // Unparseable shape — could be a bare hostname or wholly + // malformed. Rejecting protects against future changes that + // silently accept new shapes. + return false + } + if host == "" { + // ":port" listens on ALL interfaces — explicitly non-loopback. + return false + } + if host == "localhost" { + return true + } + ip := net.ParseIP(host) + if ip == nil { + // Hostname that isn't "localhost". We don't resolve DNS here + // (slow + misleading); reject so deploys must be explicit. + return false + } + return ip.IsLoopback() +} diff --git a/internal/shared/bind_test.go b/internal/shared/bind_test.go new file mode 100644 index 0000000..178b263 --- /dev/null +++ b/internal/shared/bind_test.go @@ -0,0 +1,129 @@ +package shared + +import ( + "os" + "strings" + "testing" +) + +// Closes audit R-001's worst case (accidental non-loopback deploy) +// at the predicate layer. Run integration coverage lives in the +// existing smoke chain — this file proves the rules. + +func TestIsLoopbackAddr(t *testing.T) { + cases := []struct { + name string + addr string + want bool + }{ + // Pass cases — every shape we accept. + {"127.0.0.1 standard", "127.0.0.1:3214", true}, + {"127.0.0.0/8 mid-range", "127.5.6.7:3214", true}, + {"127.255.255.254 edge", "127.255.255.254:3214", true}, + {"IPv6 loopback", "[::1]:3214", true}, + {"localhost hostname", "localhost:3214", true}, + + // Reject cases — every shape that should fail-loud. + {"empty addr", "", false}, + {"empty host (all interfaces)", ":3214", false}, + {"explicit any IPv4", "0.0.0.0:3214", false}, + {"explicit any IPv6", "[::]:3214", false}, + {"public IPv4", "8.8.8.8:3214", false}, + {"private LAN IPv4", "192.168.1.176:3214", false}, + {"hostname (not localhost)", "myhost.example.com:3214", false}, + {"missing port", "127.0.0.1", false}, + {"garbage", "not an addr", false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := isLoopbackAddr(tc.addr) + if got != tc.want { + t.Errorf("isLoopbackAddr(%q) = %v, want %v", tc.addr, got, tc.want) + } + }) + } +} + +func TestRequireLoopback_AcceptsLoopback(t *testing.T) { + if err := requireLoopbackOrOverride("queryd", "127.0.0.1:3214"); err != nil { + t.Errorf("loopback should pass without env, got %v", err) + } + if err := requireLoopbackOrOverride("vectord", "[::1]:3215"); err != nil { + t.Errorf("IPv6 loopback should pass, got %v", err) + } +} + +func TestRequireLoopback_RejectsNonLoopback(t *testing.T) { + cases := []string{ + "0.0.0.0:3214", + ":3214", + "192.168.1.176:3214", + } + for _, addr := range cases { + t.Run(addr, func(t *testing.T) { + err := requireLoopbackOrOverride("queryd", addr) + if err == nil { + t.Fatalf("expected error on %q without override, got nil", addr) + } + // Error message should cite the override env so operators + // can quickly see how to opt in if intentional. + if !strings.Contains(err.Error(), "LH_QUERYD_ALLOW_NONLOOPBACK") { + t.Errorf("error should cite override env, got %q", err.Error()) + } + // And reference R-001 so the audit trail is explicit. + if !strings.Contains(err.Error(), "R-001") { + t.Errorf("error should cite R-001, got %q", err.Error()) + } + }) + } +} + +func TestRequireLoopback_OverrideEnvAllowsNonLoopback(t *testing.T) { + t.Setenv("LH_QUERYD_ALLOW_NONLOOPBACK", "1") + if err := requireLoopbackOrOverride("queryd", "0.0.0.0:3214"); err != nil { + t.Errorf("override should permit non-loopback, got %v", err) + } +} + +func TestRequireLoopback_OverrideEnvOnlyApplies_ExactValue1(t *testing.T) { + // "true", "yes", anything else != "1" should NOT trigger the + // override. Strict matching prevents silent acceptance of typos. + cases := []string{"true", "yes", "TRUE", "01", " 1", ""} + for _, val := range cases { + t.Run("val="+val, func(t *testing.T) { + t.Setenv("LH_QUERYD_ALLOW_NONLOOPBACK", val) + err := requireLoopbackOrOverride("queryd", "0.0.0.0:3214") + if err == nil { + t.Fatalf("override value %q should NOT permit non-loopback", val) + } + }) + } +} + +func TestRequireLoopback_EnvIsPerService(t *testing.T) { + // Setting queryd's override should NOT affect vectord. Each binary + // must opt in explicitly so a single-service exposure decision + // doesn't silently apply to others. + t.Setenv("LH_QUERYD_ALLOW_NONLOOPBACK", "1") + // queryd allowed: + if err := requireLoopbackOrOverride("queryd", "0.0.0.0:3214"); err != nil { + t.Errorf("queryd should be allowed, got %v", err) + } + // vectord still rejected: + if err := requireLoopbackOrOverride("vectord", "0.0.0.0:3215"); err == nil { + t.Error("vectord should still be rejected — env is per-service") + } +} + +// Sanity: the env override variable name composition. If anyone ever +// renames the prefix or casing, every cmd//main.go behavior breaks. +func TestRequireLoopback_OverrideEnvName(t *testing.T) { + // Make sure the env we expect users to set actually triggers the + // path. Helps catch a refactor that changes the prefix without + // updating docs. + defer os.Unsetenv("LH_GATEWAY_ALLOW_NONLOOPBACK") + os.Setenv("LH_GATEWAY_ALLOW_NONLOOPBACK", "1") + if err := requireLoopbackOrOverride("gateway", "0.0.0.0:3110"); err != nil { + t.Errorf("gateway override env should work, got %v", err) + } +} diff --git a/internal/shared/server.go b/internal/shared/server.go index f545614..d576ac5 100644 --- a/internal/shared/server.go +++ b/internal/shared/server.go @@ -47,7 +47,16 @@ type RegisterRoutes func(r chi.Router) // want their own slog.Default() should set it before calling Run. // (Per Kimi review #4: shared library functions shouldn't silently // mutate package globals.) +// +// Refuses to bind a non-loopback address unless the +// LH__ALLOW_NONLOOPBACK=1 env is set — closes the accidental +// 0.0.0.0 deploy path for R-001 (queryd /sql is RCE-equivalent off +// loopback, but the gate applies to every binary uniformly). func Run(serviceName, addr string, register RegisterRoutes) error { + if err := requireLoopbackOrOverride(serviceName, addr); err != nil { + return err + } + logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ Level: slog.LevelInfo, }))