gateway: /v1/validate endpoint — Phase 43 v3 part 2
Closes the Phase 43 PRD's "any caller can validate" surface. The
validator crate (FillValidator + EmailValidator + PlaybookValidator
+ WorkerLookup) is now reachable over HTTP at /v1/validate.
Request/response:
POST /v1/validate
{"kind":"fill"|"email"|"playbook", "artifact":{...}, "context":{...}?}
→ 200 + Report on success
→ 422 + ValidationError on validation failure
→ 400 on bad kind
Boot-time wiring (main.rs):
- Load workers_500k.parquet into a shared Arc<dyn WorkerLookup>
- Path overridable via LH_WORKERS_PARQUET env
- Missing file: warn + fall back to empty InMemoryWorkerLookup so the
endpoint stays live (validators just fail Consistency on every
worker-existence check, which is the correct behavior when the
roster isn't configured)
- Boot log line: "workers parquet loaded from <path>" or
"workers parquet at <path> not found"
- Live boot timing: 500K rows loaded in ~1.4s
V1State gains `validate_workers: Arc<dyn validator::WorkerLookup>`.
The `_context` JSON key is auto-injected from `request.context` so
callers can either embed `_context` directly in `artifact` or split
it cleanly via the `context` field.
Verified live (gateway + 500K worker snapshot):
POST {kind:"fill", phantom W-FAKE-99999} → 422 Consistency
("does not exist in
worker roster")
POST {kind:"fill", real W-1, "Anyone"} → 200 OK + Warning
("differs from
roster name 'Donald
Green'")
POST {kind:"email", body has 123-45-6789} → 422 Policy ("SSN-
shaped sequence")
POST {kind:"nonsense"} → 400 Bad Request
The "0→85% with iteration" thesis can now run end-to-end on real
staffing data: an executor emits a fill_proposal, posts to
/v1/validate, gets a structured ValidationError on phantom IDs or
inactive workers, observer-corrects, retries. Closure of that loop
in a scrum harness is the next commit (separate scope).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
ebd9ab7c77
commit
86123fce4c
3
Cargo.lock
generated
3
Cargo.lock
generated
@ -4093,6 +4093,7 @@ dependencies = [
|
||||
"tracing-opentelemetry",
|
||||
"tracing-subscriber",
|
||||
"truth",
|
||||
"validator",
|
||||
"vectord",
|
||||
]
|
||||
|
||||
@ -8912,6 +8913,8 @@ dependencies = [
|
||||
name = "validator"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"arrow 55.2.0",
|
||||
"parquet 55.2.0",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror 2.0.18",
|
||||
|
||||
@ -13,6 +13,7 @@ ingestd = { path = "../ingestd" }
|
||||
vectord = { path = "../vectord" }
|
||||
journald = { path = "../journald" }
|
||||
truth = { path = "../truth" }
|
||||
validator = { path = "../validator" }
|
||||
tokio = { workspace = true }
|
||||
axum = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
|
||||
@ -295,6 +295,30 @@ async fn main() {
|
||||
}
|
||||
k
|
||||
},
|
||||
validate_workers: {
|
||||
// Load workers_500k.parquet snapshot for /v1/validate.
|
||||
// Path overridable via LH_WORKERS_PARQUET env. Missing
|
||||
// file is non-fatal — validators run schema/PII checks
|
||||
// unaffected; only worker-existence checks fail clean.
|
||||
let path_str = std::env::var("LH_WORKERS_PARQUET")
|
||||
.unwrap_or_else(|_| "/home/profit/lakehouse/data/datasets/workers_500k.parquet".into());
|
||||
let path = std::path::Path::new(&path_str);
|
||||
if path.exists() {
|
||||
match validator::staffing::parquet_lookup::load_workers_parquet(path) {
|
||||
Ok(lookup) => {
|
||||
tracing::info!("v1: workers parquet loaded from {} — /v1/validate worker-existence checks enabled", path_str);
|
||||
lookup
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("v1: workers parquet at {} unreadable ({e}) — /v1/validate worker-existence checks will fail Consistency", path_str);
|
||||
std::sync::Arc::new(validator::InMemoryWorkerLookup::new())
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tracing::warn!("v1: workers parquet at {} not found — /v1/validate worker-existence checks will fail Consistency", path_str);
|
||||
std::sync::Arc::new(validator::InMemoryWorkerLookup::new())
|
||||
}
|
||||
},
|
||||
// Phase 40 early deliverable — Langfuse trace emitter.
|
||||
// Defaults match mcp-server/tracing.ts conventions so
|
||||
// gateway traces land in the same staffing project.
|
||||
|
||||
@ -18,6 +18,7 @@ pub mod gemini;
|
||||
pub mod claude;
|
||||
pub mod kimi;
|
||||
pub mod opencode;
|
||||
pub mod validate;
|
||||
pub mod langfuse_trace;
|
||||
pub mod mode;
|
||||
pub mod respond;
|
||||
@ -68,6 +69,15 @@ pub struct V1State {
|
||||
/// `OPENCODE_API_KEY` env or `/etc/lakehouse/opencode.env`. None =
|
||||
/// provider="opencode" calls 503.
|
||||
pub opencode_key: Option<String>,
|
||||
/// Shared WorkerLookup loaded once at startup from
|
||||
/// workers_500k.parquet (path: LH_WORKERS_PARQUET env, default
|
||||
/// data/datasets/workers_500k.parquet). Used by /v1/validate to
|
||||
/// run FillValidator/EmailValidator with worker-existence checks.
|
||||
/// Falls back to an empty InMemoryWorkerLookup if the file is
|
||||
/// missing — validators still run schema/PII checks but every
|
||||
/// worker-existence check fails (Consistency error), which is
|
||||
/// the correct behavior when the roster isn't configured.
|
||||
pub validate_workers: std::sync::Arc<dyn validator::WorkerLookup>,
|
||||
/// Phase 40 early deliverable — Langfuse client. None = tracing
|
||||
/// disabled (keys missing or container unreachable). Traces are
|
||||
/// fire-and-forget: never block the response path.
|
||||
@ -107,6 +117,7 @@ pub fn router(state: V1State) -> Router {
|
||||
.route("/mode", post(mode::route))
|
||||
.route("/mode/list", get(mode::list))
|
||||
.route("/mode/execute", post(mode::execute))
|
||||
.route("/validate", post(validate::validate))
|
||||
.with_state(state)
|
||||
}
|
||||
|
||||
|
||||
82
crates/gateway/src/v1/validate.rs
Normal file
82
crates/gateway/src/v1/validate.rs
Normal file
@ -0,0 +1,82 @@
|
||||
//! /v1/validate — gateway-side artifact validation endpoint.
|
||||
//!
|
||||
//! Phase 43 v3 part 2: makes the validator crate network-callable.
|
||||
//! Any caller (scrum loop, test harness, future agent) can POST a
|
||||
//! generated artifact and get back a Report (success) or
|
||||
//! ValidationError (failure with structured field/reason).
|
||||
//!
|
||||
//! Request shape:
|
||||
//! POST /v1/validate
|
||||
//! {
|
||||
//! "kind": "fill" | "email" | "playbook",
|
||||
//! "artifact": { ... },
|
||||
//! "context": { ... } // optional — folded into artifact._context
|
||||
//! }
|
||||
//!
|
||||
//! Response on success: 200 + Report JSON
|
||||
//! Response on failure: 422 + ValidationError JSON
|
||||
//! Response on bad request: 400 + plain-text error
|
||||
//!
|
||||
//! The shared WorkerLookup is loaded once at gateway startup from
|
||||
//! workers_500k.parquet (path configurable via LH_WORKERS_PARQUET
|
||||
//! env, defaults to data/datasets/workers_500k.parquet). Falls back
|
||||
//! to an empty InMemoryWorkerLookup if the file is missing — the
|
||||
//! validators will still run schema/length/PII checks but worker-
|
||||
//! existence checks will all fail (Consistency error), which is the
|
||||
//! correct behavior when the roster isn't configured.
|
||||
|
||||
use axum::{extract::State, http::StatusCode, response::IntoResponse, Json};
|
||||
use serde::Deserialize;
|
||||
use validator::{
|
||||
Artifact, Validator, ValidationError,
|
||||
staffing::{
|
||||
fill::FillValidator,
|
||||
email::EmailValidator,
|
||||
playbook::PlaybookValidator,
|
||||
},
|
||||
};
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct ValidateRequest {
|
||||
/// `"fill" | "email" | "playbook"` — picks which validator runs.
|
||||
pub kind: String,
|
||||
/// The artifact JSON (free-form; shape depends on `kind`).
|
||||
pub artifact: serde_json::Value,
|
||||
/// Optional context bag — merged into `artifact._context` so the
|
||||
/// validator can read fields like `target_count`, `city`,
|
||||
/// `client_id`, `candidate_id` without callers having to embed
|
||||
/// `_context` in the artifact themselves.
|
||||
#[serde(default)]
|
||||
pub context: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
pub async fn validate(
|
||||
State(state): State<super::V1State>,
|
||||
Json(req): Json<ValidateRequest>,
|
||||
) -> impl IntoResponse {
|
||||
// Merge context into artifact under `_context` so validators can
|
||||
// pull contract metadata uniformly.
|
||||
let mut artifact_value = req.artifact;
|
||||
if let Some(ctx) = req.context {
|
||||
if let Some(obj) = artifact_value.as_object_mut() {
|
||||
obj.insert("_context".to_string(), ctx);
|
||||
}
|
||||
}
|
||||
|
||||
// Dispatch.
|
||||
let workers = state.validate_workers.clone();
|
||||
let result: Result<validator::Report, ValidationError> = match req.kind.as_str() {
|
||||
"fill" => FillValidator::new(workers).validate(&Artifact::FillProposal(artifact_value)),
|
||||
"email" => EmailValidator::new(workers).validate(&Artifact::EmailDraft(artifact_value)),
|
||||
"playbook" => PlaybookValidator.validate(&Artifact::Playbook(artifact_value)),
|
||||
other => return (
|
||||
StatusCode::BAD_REQUEST,
|
||||
format!("unknown kind '{other}' — expected fill | email | playbook"),
|
||||
).into_response(),
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(report) => (StatusCode::OK, Json(report)).into_response(),
|
||||
Err(e) => (StatusCode::UNPROCESSABLE_ENTITY, Json(e)).into_response(),
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user