golangLAKEHOUSE/internal/validator/session_log.go

package validator

import (
	"encoding/json"
	"fmt"
	"log/slog"
	"os"
	"path/filepath"
	"sync"
	"time"
)

// SessionRecordSchema versions the on-wire JSON shape. Bump when the
// schema changes incompatibly. Consumers (e.g. duckdb queries over
// coordinator_sessions.jsonl, scrum review tooling) check this field
// to decide whether they understand the row.
const SessionRecordSchema = "session.iterate.v1"

// SessionRecord is one row in coordinator_sessions.jsonl. Captures the
// full retry chain of a single /v1/iterate session for offline
// forensics: "show me all sessions where the validator caught a
// phantom worker" / "show me all sessions where retrieval missed."
//
// The Langfuse trace tree (see X-Lakehouse-Trace-Id propagation
// 2026-05-02) is the live view; this JSONL is the longitudinal view
// for ad-hoc DuckDB queries over thousands of sessions.
type SessionRecord struct {
	Schema           string                 `json:"schema"`
	SessionID        string                 `json:"session_id"` // = Langfuse trace_id
	Timestamp        string                 `json:"timestamp"`  // ISO 8601
	Daemon           string                 `json:"daemon"`
	Kind             string                 `json:"kind"`     // fill | email | playbook
	Model            string                 `json:"model"`
	Provider         string                 `json:"provider"`
	Prompt           string                 `json:"prompt"` // truncated to 4000
	Iterations       int                    `json:"iterations"`
	MaxIterations    int                    `json:"max_iterations"`
	FinalVerdict     string                 `json:"final_verdict"` // accepted | max_iter_exhausted | infra_error
	Attempts         []SessionAttemptRecord `json:"attempts"`
	Artifact         map[string]any         `json:"artifact,omitempty"` // present on success
	GroundedInRoster *bool                  `json:"grounded_in_roster,omitempty"`
	DurationMs       int64                  `json:"duration_ms"`
}

// SessionAttemptRecord mirrors IterateAttempt but stores only the
// stable signals (iteration, verdict, error, span id). The raw model
// output is intentionally NOT captured here — it lives in the
// Langfuse span (queryable by span_id) and the iterate response. Two
// copies would let the JSONL grow unbounded on long sessions.
type SessionAttemptRecord struct {
	Iteration   int    `json:"iteration"`
	VerdictKind string `json:"verdict_kind"` // no_json | validation_failed | accepted
	Error       string `json:"error,omitempty"`
	SpanID      string `json:"span_id,omitempty"`
}

// SessionLogger appends SessionRecord rows to a JSONL file. Best-effort:
// errors are logged via slog and never returned to the caller (per the
// rest of the observability stack — never block a request because the
// session log is unhappy).
//
// nil is a valid value: NewSessionLogger("") returns nil; Append on
// a nil receiver is a no-op. Lets validatord skip the wiring entirely
// when no session log is configured.
type SessionLogger struct {
	path string
	mu   sync.Mutex
}

// NewSessionLogger constructs a logger writing to `path`. Empty path
// disables logging. Creates parent dirs lazily on first write so the
// constructor doesn't panic when the path is on a not-yet-mounted
// volume (e.g. systemd unit ordering).
func NewSessionLogger(path string) *SessionLogger {
	if path == "" {
		return nil
	}
	return &SessionLogger{path: path}
}

// Append writes one JSONL row. Returns nil on success or after a
// best-effort failure. The caller doesn't need to handle errors;
// logging is an observability witness, not a correctness gate.
func (l *SessionLogger) Append(rec SessionRecord) {
	if l == nil {
		return
	}
	if rec.Schema == "" {
		rec.Schema = SessionRecordSchema
	}
	if rec.Timestamp == "" {
		rec.Timestamp = time.Now().UTC().Format(time.RFC3339Nano)
	}
	if rec.Daemon == "" {
		rec.Daemon = "validatord"
	}
	body, err := json.Marshal(rec)
	if err != nil {
		slog.Warn("session_log: marshal", "err", err, "session_id", rec.SessionID)
		return
	}
	body = append(body, '\n')

	l.mu.Lock()
	defer l.mu.Unlock()

	if err := os.MkdirAll(filepath.Dir(l.path), 0o755); err != nil {
		slog.Warn("session_log: mkdir", "err", err, "path", l.path)
		return
	}
	f, err := os.OpenFile(l.path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
	if err != nil {
		slog.Warn("session_log: open", "err", err, "path", l.path)
		return
	}
	defer f.Close()
	if _, err := f.Write(body); err != nil {
		slog.Warn("session_log: write", "err", err, "session_id", rec.SessionID)
	}
}

// BuildSessionRecord assembles a SessionRecord from an iterate
// response/failure pair. Exactly one of resp/fail must be non-nil
// (or both nil for an infrastructure failure). Centralized here so
// validatord and any future iterate-using daemon emit the same shape.
//
// rosterCheck is called per accepted artifact to populate
// GroundedInRoster — set to nil to skip the check (e.g. for
// non-fill kinds that don't have worker IDs). Returns the shaped
// record; pass to logger.Append on the caller side so failed
// daemons don't block on their own observability layer.
func BuildSessionRecord(
	req IterateRequest,
	resp *IterateResponse,
	fail *IterateFailure,
	infraErr error,
	rosterCheck func(map[string]any) *bool,
	durationMs int64,
) SessionRecord {
	rec := SessionRecord{
		SessionID:     req.TraceID,
		Kind:          req.Kind,
		Model:         req.Model,
		Provider:      req.Provider,
		Prompt:        trim(req.Prompt, 4000),
		MaxIterations: req.MaxIterations,
		DurationMs:    durationMs,
	}

	switch {
	case resp != nil:
		rec.Iterations = resp.Iterations
		rec.FinalVerdict = "accepted"
		rec.Attempts = sessionAttemptsFromHistory(resp.History)
		rec.Artifact = resp.Artifact
		if rosterCheck != nil {
			rec.GroundedInRoster = rosterCheck(resp.Artifact)
		}
		// Keep the trace id authoritative even if the request didn't
		// supply one — the iterate response carries the resolved id.
		if rec.SessionID == "" {
			rec.SessionID = resp.TraceID
		}
	case fail != nil:
		rec.Iterations = fail.Iterations
		rec.FinalVerdict = "max_iter_exhausted"
		rec.Attempts = sessionAttemptsFromHistory(fail.History)
		if rec.SessionID == "" {
			rec.SessionID = fail.TraceID
		}
	default:
		// Infrastructure failure (chat hop crashed mid-loop, etc.).
		// We still emit a row so the failure is forensically visible —
		// otherwise long debugging sessions get harder.
		rec.FinalVerdict = "infra_error"
		if infraErr != nil {
			rec.Attempts = []SessionAttemptRecord{{
				Iteration:   0,
				VerdictKind: "infra_error",
				Error:       trim(fmt.Sprintf("%v", infraErr), 800),
			}}
		}
	}

	return rec
}

func sessionAttemptsFromHistory(h []IterateAttempt) []SessionAttemptRecord {
	out := make([]SessionAttemptRecord, len(h))
	for i, a := range h {
		out[i] = SessionAttemptRecord{
			Iteration:   a.Iteration,
			VerdictKind: a.Status.Kind,
			Error:       a.Status.Error,
			SpanID:      a.SpanID,
		}
	}
	return out
}