golangLAKEHOUSE/internal/shared/config_test.go

package shared

import (
	"os"
	"path/filepath"
	"strings"
	"testing"
)

// Closes the config.go side of R-002 — TOML loader, default values,
// missing-file warn semantics. The audit flagged "internal/shared
// has zero tests" without distinguishing server.go from config.go;
// this file covers the latter.

func TestDefaultConfig_G0Ports(t *testing.T) {
	cfg := DefaultConfig()
	// Ports are shifted to 3110+ to coexist with the live Rust
	// lakehouse on 3100/3201-3204 during the migration. Locking
	// these values via test means a refactor that flips a port
	// silently can't ship without a test edit.
	checks := []struct {
		name     string
		actual   string
		expected string
	}{
		{"gateway bind", cfg.Gateway.Bind, "127.0.0.1:3110"},
		{"storaged bind", cfg.Storaged.Bind, "127.0.0.1:3211"},
		{"catalogd bind", cfg.Catalogd.Bind, "127.0.0.1:3212"},
		{"ingestd bind", cfg.Ingestd.Bind, "127.0.0.1:3213"},
		{"queryd bind", cfg.Queryd.Bind, "127.0.0.1:3214"},
		{"vectord bind", cfg.Vectord.Bind, "127.0.0.1:3215"},
		{"embedd bind", cfg.Embedd.Bind, "127.0.0.1:3216"},
	}
	for _, c := range checks {
		if c.actual != c.expected {
			t.Errorf("%s = %q, want %q", c.name, c.actual, c.expected)
		}
	}
	// G0 default: 256 MiB ingest cap (real-scale 500K test bumped
	// this to 512 — still 256 here as the documented default).
	if cfg.Ingestd.MaxIngestBytes != 256<<20 {
		t.Errorf("ingestd MaxIngestBytes = %d, want %d", cfg.Ingestd.MaxIngestBytes, 256<<20)
	}
	// embedd default model is the G2 nomic-embed-text default.
	if cfg.Embedd.DefaultModel != "nomic-embed-text" {
		t.Errorf("embedd DefaultModel = %q, want nomic-embed-text", cfg.Embedd.DefaultModel)
	}
	// queryd refresh ticker default — production value, not the proof
	// harness's 500ms override.
	if cfg.Queryd.RefreshEvery != "30s" {
		t.Errorf("queryd RefreshEvery = %q, want 30s", cfg.Queryd.RefreshEvery)
	}
}

func TestLoadConfig_EmptyPath_ReturnsDefaults(t *testing.T) {
	cfg, err := LoadConfig("")
	if err != nil {
		t.Fatalf("empty path should not error, got %v", err)
	}
	if cfg.Gateway.Bind != "127.0.0.1:3110" {
		t.Errorf("expected default gateway bind, got %q", cfg.Gateway.Bind)
	}
}

func TestLoadConfig_MissingFile_FallsBackToDefaults(t *testing.T) {
	// Per the comment in config.go: "non-empty + missing is suspicious"
	// — but the contract is to log a warn and return defaults, not
	// fail. We verify the contract; capturing the warn line is a
	// stretch for a unit test (slog default sink is os.Stderr).
	cfg, err := LoadConfig("/nonexistent/path/lakehouse.toml")
	if err != nil {
		t.Fatalf("missing file should not error, got %v", err)
	}
	if cfg.Storaged.Bind != "127.0.0.1:3211" {
		t.Errorf("expected default storaged bind on missing file, got %q", cfg.Storaged.Bind)
	}
}

func TestLoadConfig_ValidTOML_RoundTrip(t *testing.T) {
	// Write a partial config; verify only the overridden sections
	// land while the rest stay at defaults.
	dir := t.TempDir()
	cfgPath := filepath.Join(dir, "lakehouse.toml")
	body := `[gateway]
bind = "0.0.0.0:8080"

[s3]
endpoint = "http://other-minio:9000"
bucket   = "custom-bucket"
`
	if err := os.WriteFile(cfgPath, []byte(body), 0o644); err != nil {
		t.Fatalf("write config: %v", err)
	}

	cfg, err := LoadConfig(cfgPath)
	if err != nil {
		t.Fatalf("LoadConfig: %v", err)
	}

	if cfg.Gateway.Bind != "0.0.0.0:8080" {
		t.Errorf("gateway.bind = %q, want 0.0.0.0:8080", cfg.Gateway.Bind)
	}
	if cfg.S3.Bucket != "custom-bucket" {
		t.Errorf("s3.bucket = %q, want custom-bucket", cfg.S3.Bucket)
	}
	// Unspecified sections keep defaults (TOML decoder doesn't zero
	// fields it didn't see).
	if cfg.Storaged.Bind != "127.0.0.1:3211" {
		t.Errorf("storaged.bind drifted to %q, want default 127.0.0.1:3211", cfg.Storaged.Bind)
	}
}

func TestDefaultConfig_ModelsTier(t *testing.T) {
	cfg := DefaultConfig()
	// Locks tier defaults so a refactor that drops a tier or renames
	// a default silently can't ship without a test edit.
	checks := []struct {
		tier, want string
	}{
		{"local_fast", "qwen3.5:latest"},
		{"local_embed", "nomic-embed-text"},
		{"local_judge", "qwen3.5:latest"},
		{"cloud_judge", "kimi-k2.6:cloud"},
		{"cloud_review", "qwen3-coder:480b"},
		{"frontier_review", "openrouter/anthropic/claude-opus-4-7"},
		{"frontier_free", "opencode/claude-opus-4-7"},
	}
	for _, c := range checks {
		if got := cfg.Models.Resolve(c.tier); got != c.want {
			t.Errorf("Models.Resolve(%q) = %q, want %q", c.tier, got, c.want)
		}
	}
	// Unknown tier returns "" — caller's responsibility to fall back.
	if got := cfg.Models.Resolve("nonexistent"); got != "" {
		t.Errorf("Models.Resolve(nonexistent) = %q, want empty string", got)
	}
}

func TestModelsConfig_IsWeak(t *testing.T) {
	cfg := DefaultConfig()
	// Default WeakModels set is the matrix.downgrade bypass list.
	if !cfg.Models.IsWeak("qwen3.5:latest") {
		t.Errorf("qwen3.5:latest should be weak (in default WeakModels)")
	}
	if !cfg.Models.IsWeak("qwen3:latest") {
		t.Errorf("qwen3:latest should be weak")
	}
	// Strong / cloud / frontier models are NOT weak.
	for _, m := range []string{
		"opencode/claude-opus-4-7",
		"openrouter/openai/gpt-5",
		"qwen3-coder:480b",
		"deepseek-v3.2",
	} {
		if cfg.Models.IsWeak(m) {
			t.Errorf("%s should NOT be weak", m)
		}
	}
}

func TestLoadConfig_ModelsTOMLRoundTrip(t *testing.T) {
	// Override one tier name; verify it loads, rest stay at defaults.
	dir := t.TempDir()
	cfgPath := filepath.Join(dir, "lakehouse.toml")
	body := `[models]
local_judge = "custom-judge:latest"
weak_models = ["custom-judge:latest", "qwen3:latest"]
`
	if err := os.WriteFile(cfgPath, []byte(body), 0o644); err != nil {
		t.Fatalf("write config: %v", err)
	}
	cfg, err := LoadConfig(cfgPath)
	if err != nil {
		t.Fatalf("LoadConfig: %v", err)
	}
	if cfg.Models.LocalJudge != "custom-judge:latest" {
		t.Errorf("local_judge = %q, want custom-judge:latest", cfg.Models.LocalJudge)
	}
	if !cfg.Models.IsWeak("custom-judge:latest") {
		t.Errorf("custom-judge:latest should be weak after override")
	}
	// Unspecified tiers keep defaults — TOML decoder doesn't zero
	// fields it didn't see, BUT slices are replaced wholesale. The
	// weak_models override above is intentional so callers know
	// they're trading the full default list when they set this.
	if cfg.Models.LocalFast != "qwen3.5:latest" {
		t.Errorf("local_fast drifted to %q, want default qwen3.5:latest", cfg.Models.LocalFast)
	}
}

// TestLoadConfig_AuthTokenFromEnv locks ADR-006 Decision 6.2:
// production deploys put the secret in /etc/lakehouse/auth.env (mode
// 0600), loaded by systemd EnvironmentFile=, NEVER in the committed
// TOML. The TOML names the env var via token_env; the loader fills
// Token from os.Getenv. TokenEnv defaults to "AUTH_TOKEN" so the
// happy path needs no TOML config at all.
func TestLoadConfig_AuthTokenFromEnv(t *testing.T) {
	t.Run("default env name AUTH_TOKEN", func(t *testing.T) {
		t.Setenv("AUTH_TOKEN", "from-default-env")
		dir := t.TempDir()
		path := filepath.Join(dir, "lakehouse.toml")
		if err := os.WriteFile(path, []byte(`[auth]
allowed_ips = []
`), 0o644); err != nil {
			t.Fatal(err)
		}
		cfg, err := LoadConfig(path)
		if err != nil {
			t.Fatalf("LoadConfig: %v", err)
		}
		if cfg.Auth.Token != "from-default-env" {
			t.Errorf("Token = %q, want from AUTH_TOKEN env", cfg.Auth.Token)
		}
	})

	t.Run("custom env name from token_env", func(t *testing.T) {
		t.Setenv("CUSTOM_AUTH_TOKEN", "from-custom-env")
		dir := t.TempDir()
		path := filepath.Join(dir, "lakehouse.toml")
		if err := os.WriteFile(path, []byte(`[auth]
token_env = "CUSTOM_AUTH_TOKEN"
`), 0o644); err != nil {
			t.Fatal(err)
		}
		cfg, err := LoadConfig(path)
		if err != nil {
			t.Fatalf("LoadConfig: %v", err)
		}
		if cfg.Auth.Token != "from-custom-env" {
			t.Errorf("Token = %q, want from CUSTOM_AUTH_TOKEN env", cfg.Auth.Token)
		}
	})

	t.Run("explicit token wins over env", func(t *testing.T) {
		t.Setenv("AUTH_TOKEN", "from-env")
		dir := t.TempDir()
		path := filepath.Join(dir, "lakehouse.toml")
		if err := os.WriteFile(path, []byte(`[auth]
token = "from-toml"
`), 0o644); err != nil {
			t.Fatal(err)
		}
		cfg, err := LoadConfig(path)
		if err != nil {
			t.Fatalf("LoadConfig: %v", err)
		}
		// Explicit Token in TOML wins over env — the loader only
		// fills from env when Token is empty. Lets local dev
		// override prod env vars.
		if cfg.Auth.Token != "from-toml" {
			t.Errorf("Token = %q, want explicit TOML value", cfg.Auth.Token)
		}
	})
}

func TestLoadConfig_InvalidTOML_ReturnsError(t *testing.T) {
	dir := t.TempDir()
	cfgPath := filepath.Join(dir, "bad.toml")
	if err := os.WriteFile(cfgPath, []byte("this is = not [toml"), 0o644); err != nil {
		t.Fatalf("write bad config: %v", err)
	}

	_, err := LoadConfig(cfgPath)
	if err == nil {
		t.Fatal("expected parse error on malformed TOML, got nil")
	}
	if !strings.Contains(err.Error(), "parse config") {
		t.Errorf("error = %v, want 'parse config' wrapper", err)
	}
}

func TestLoadConfig_FileButUnreadable(t *testing.T) {
	// Skip on non-unix or when running as root (which can read
	// 0000-permission files). We only need this case in CI/local-dev
	// where test user isn't root. Per memory `feedback_pkill_scope.md`
	// J's box runs many things as root; treat this as informational.
	if os.Geteuid() == 0 {
		t.Skip("root can read 0000 files; skipping unreadable-file case")
	}
	dir := t.TempDir()
	cfgPath := filepath.Join(dir, "locked.toml")
	if err := os.WriteFile(cfgPath, []byte("[gateway]\nbind=\":1\""), 0o000); err != nil {
		t.Fatalf("write: %v", err)
	}

	_, err := LoadConfig(cfgPath)
	if err == nil {
		t.Fatal("expected read error on unreadable file, got nil")
	}
	if !strings.Contains(err.Error(), "read config") {
		t.Errorf("error = %v, want 'read config' wrapper", err)
	}
}