catalogd: Step 8 — parity_subject_audit binary (Rust side)
Per docs/specs/SUBJECT_MANIFESTS_ON_CATALOGD.md §5 Step 8.
Cross-runtime parity helper consumed by:
golangLAKEHOUSE/scripts/cutover/parity/subject_audit_parity.sh
Two modes:
--known-answer
Print canonical-JSON + HMAC for a hardcoded fixture row. The Go
helper at golangLAKEHOUSE/scripts/cutover/parity/subject_audit_helper/
must produce byte-identical output. Catches algorithm drift
(canonical-JSON sort order, HMAC algorithm, hex encoding).
--verify <audit_log_path> --key <key_path>
Replay the chain on a real production audit log via the live
SubjectAuditWriter::verify_chain (no re-implementation; the actual
production verification path). Output: one JSON line with mode,
count, tip, verified, error.
The helper exercises the SAME verify_chain path the gateway calls, so
algorithm changes in subject_audit.rs automatically flow into the
parity probe.
Live-verified against 5 production audit logs in data/_catalog/subjects;
all 6 parity assertions pass after fixing two real cross-runtime drifts
on the Go side (omitempty trace_id stripping field; time.RFC3339Nano
stripping trailing zero in nanoseconds — both caught by this probe).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
8fc6238dea
commit
2413c96817
220
crates/catalogd/src/bin/parity_subject_audit.rs
Normal file
220
crates/catalogd/src/bin/parity_subject_audit.rs
Normal file
@ -0,0 +1,220 @@
|
|||||||
|
//! Cross-runtime parity helper for subject-audit chain.
|
||||||
|
//!
|
||||||
|
//! Specification: docs/specs/SUBJECT_MANIFESTS_ON_CATALOGD.md §5 Step 8.
|
||||||
|
//!
|
||||||
|
//! This binary is consumed by scripts/cutover/parity/subject_audit_parity.sh
|
||||||
|
//! (which lives in /home/profit/golangLAKEHOUSE/scripts/cutover/parity/).
|
||||||
|
//! Its Go counterpart is at golangLAKEHOUSE/scripts/cutover/parity/subject_audit_helper/main.go.
|
||||||
|
//!
|
||||||
|
//! Both helpers MUST produce byte-identical output for the same inputs.
|
||||||
|
//! Divergence here is a parity break — a SubjectManifest written by Rust
|
||||||
|
//! that Go cannot verify, or vice versa.
|
||||||
|
//!
|
||||||
|
//! Two modes:
|
||||||
|
//!
|
||||||
|
//! --known-answer
|
||||||
|
//! Print the canonical bytes + HMAC of a hardcoded fixture row.
|
||||||
|
//! The Go helper must produce IDENTICAL bytes + IDENTICAL hash.
|
||||||
|
//! Hardcoded fixture matches Go test TestKnownAnswerVector.
|
||||||
|
//!
|
||||||
|
//! --verify <audit_log_path> --key <key_path>
|
||||||
|
//! Parse the JSONL audit log, replay the HMAC chain. Print
|
||||||
|
//! JSON: {"count": N, "tip": "<hash|GENESIS>", "verified": bool, "error": "<msg|null>"}.
|
||||||
|
//!
|
||||||
|
//! Output format: ONE JSON object per stdout, terminating newline. The
|
||||||
|
//! parity script diffs Rust stdout vs Go stdout via `diff -q`.
|
||||||
|
|
||||||
|
use catalogd::subject_audit::SubjectAuditWriter;
|
||||||
|
use hmac::{Hmac, Mac};
|
||||||
|
use object_store::ObjectStore;
|
||||||
|
use object_store::memory::InMemory;
|
||||||
|
use serde::Serialize;
|
||||||
|
use sha2::Sha256;
|
||||||
|
use shared::types::{AuditAccessor, SubjectAuditRow};
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
type HmacSha256 = Hmac<Sha256>;
|
||||||
|
|
||||||
|
const GENESIS: &str = "GENESIS";
|
||||||
|
|
||||||
|
/// Same canonical-JSON algorithm as crates/catalogd/src/subject_audit.rs.
|
||||||
|
/// Reproduced here so the helper does not depend on internal-only items
|
||||||
|
/// in that file. If the algorithm there changes, change it here AND in
|
||||||
|
/// the Go helper in lockstep.
|
||||||
|
fn canonical_json(v: &serde_json::Value) -> Vec<u8> {
|
||||||
|
fn rewrite(v: &serde_json::Value) -> serde_json::Value {
|
||||||
|
match v {
|
||||||
|
serde_json::Value::Object(map) => {
|
||||||
|
let sorted: BTreeMap<String, serde_json::Value> = map
|
||||||
|
.iter()
|
||||||
|
.map(|(k, v)| (k.clone(), rewrite(v)))
|
||||||
|
.collect();
|
||||||
|
serde_json::Value::Object(sorted.into_iter().collect())
|
||||||
|
}
|
||||||
|
serde_json::Value::Array(arr) => {
|
||||||
|
serde_json::Value::Array(arr.iter().map(rewrite).collect())
|
||||||
|
}
|
||||||
|
other => other.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
serde_json::to_vec(&rewrite(v)).expect("canonical-json serialize")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn canonical_row_bytes(row: &SubjectAuditRow) -> Vec<u8> {
|
||||||
|
let mut v: serde_json::Value = serde_json::to_value(row).expect("row to value");
|
||||||
|
if let Some(obj) = v.as_object_mut() {
|
||||||
|
obj.remove("row_hmac");
|
||||||
|
}
|
||||||
|
canonical_json(&v)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compute_hmac(key: &[u8], prev: &str, canonical: &[u8]) -> String {
|
||||||
|
let mut mac = <HmacSha256 as Mac>::new_from_slice(key).expect("HMAC accepts any key length");
|
||||||
|
mac.update(prev.as_bytes());
|
||||||
|
mac.update(canonical);
|
||||||
|
let result = mac.finalize().into_bytes();
|
||||||
|
const HEX: &[u8; 16] = b"0123456789abcdef";
|
||||||
|
let mut s = String::with_capacity(64);
|
||||||
|
for byte in result {
|
||||||
|
s.push(HEX[(byte >> 4) as usize] as char);
|
||||||
|
s.push(HEX[(byte & 0x0f) as usize] as char);
|
||||||
|
}
|
||||||
|
s
|
||||||
|
}
|
||||||
|
|
||||||
|
fn deterministic_key() -> Vec<u8> {
|
||||||
|
(0u8..32).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct KnownAnswerOut {
|
||||||
|
mode: &'static str,
|
||||||
|
canonical: String,
|
||||||
|
hmac: String,
|
||||||
|
canonical_bytes_len: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct VerifyOut {
|
||||||
|
mode: &'static str,
|
||||||
|
count: usize,
|
||||||
|
tip: String,
|
||||||
|
verified: bool,
|
||||||
|
error: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn known_answer() {
|
||||||
|
let row = SubjectAuditRow {
|
||||||
|
schema: "subject_audit.v1".into(),
|
||||||
|
ts: chrono::DateTime::parse_from_rfc3339("2026-05-03T12:00:00Z")
|
||||||
|
.unwrap()
|
||||||
|
.with_timezone(&chrono::Utc),
|
||||||
|
candidate_id: "WORKER-FIXED".into(),
|
||||||
|
accessor: AuditAccessor {
|
||||||
|
kind: "gateway_lookup".into(),
|
||||||
|
daemon: "gateway".into(),
|
||||||
|
purpose: "parity_test".into(),
|
||||||
|
trace_id: "trace-fixed".into(),
|
||||||
|
},
|
||||||
|
fields_accessed: vec!["name".into()],
|
||||||
|
result: "success".into(),
|
||||||
|
prev_chain_hash: GENESIS.into(),
|
||||||
|
row_hmac: String::new(),
|
||||||
|
};
|
||||||
|
let canon = canonical_row_bytes(&row);
|
||||||
|
let hmac = compute_hmac(&deterministic_key(), GENESIS, &canon);
|
||||||
|
let out = KnownAnswerOut {
|
||||||
|
mode: "known_answer",
|
||||||
|
canonical: String::from_utf8(canon.clone()).expect("canonical is utf-8"),
|
||||||
|
hmac,
|
||||||
|
canonical_bytes_len: canon.len(),
|
||||||
|
};
|
||||||
|
println!("{}", serde_json::to_string(&out).unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main(flavor = "current_thread")]
|
||||||
|
async fn main() {
|
||||||
|
let argv: Vec<String> = std::env::args().collect();
|
||||||
|
let mut mode_known_answer = false;
|
||||||
|
let mut audit_path: Option<PathBuf> = None;
|
||||||
|
let mut key_path: Option<PathBuf> = None;
|
||||||
|
|
||||||
|
let mut i = 1;
|
||||||
|
while i < argv.len() {
|
||||||
|
match argv[i].as_str() {
|
||||||
|
"--known-answer" => {
|
||||||
|
mode_known_answer = true;
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
"--verify" => {
|
||||||
|
audit_path = Some(PathBuf::from(
|
||||||
|
argv.get(i + 1).expect("--verify needs path"),
|
||||||
|
));
|
||||||
|
i += 2;
|
||||||
|
}
|
||||||
|
"--key" => {
|
||||||
|
key_path = Some(PathBuf::from(argv.get(i + 1).expect("--key needs path")));
|
||||||
|
i += 2;
|
||||||
|
}
|
||||||
|
"-h" | "--help" => {
|
||||||
|
eprintln!("parity_subject_audit --known-answer");
|
||||||
|
eprintln!("parity_subject_audit --verify <audit_log_path> --key <key_path>");
|
||||||
|
std::process::exit(0);
|
||||||
|
}
|
||||||
|
other => {
|
||||||
|
eprintln!("unknown arg: {other}");
|
||||||
|
std::process::exit(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if mode_known_answer {
|
||||||
|
known_answer();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let audit_path = audit_path.expect("need --known-answer OR --verify ... --key ...");
|
||||||
|
let key_path = key_path.expect("--verify also needs --key");
|
||||||
|
|
||||||
|
let key = std::fs::read(&key_path).expect("read key file");
|
||||||
|
let candidate_id = audit_path
|
||||||
|
.file_name()
|
||||||
|
.and_then(|s| s.to_str())
|
||||||
|
.and_then(|s| s.strip_suffix(".audit.jsonl"))
|
||||||
|
.expect("audit log path must end with <candidate_id>.audit.jsonl")
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
// Stand up an in-memory object store, seed it with the audit log
|
||||||
|
// bytes at the canonical key, then ask SubjectAuditWriter to verify.
|
||||||
|
// This way we exercise the SAME verify_chain function the production
|
||||||
|
// gateway calls — not a re-implementation that might drift.
|
||||||
|
let store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
|
||||||
|
let bytes = std::fs::read(&audit_path).expect("read audit log");
|
||||||
|
let log_key = format!("_catalog/subjects/{}.audit.jsonl", candidate_id);
|
||||||
|
storaged::ops::put(&store, &log_key, bytes::Bytes::from(bytes))
|
||||||
|
.await
|
||||||
|
.expect("seed object store");
|
||||||
|
|
||||||
|
let writer = SubjectAuditWriter::with_inline_key(store, key);
|
||||||
|
let result = writer.verify_chain(&candidate_id).await;
|
||||||
|
let tip = writer.chain_tip(&candidate_id).await.unwrap_or(GENESIS.into());
|
||||||
|
let out = match result {
|
||||||
|
Ok(count) => VerifyOut {
|
||||||
|
mode: "verify",
|
||||||
|
count,
|
||||||
|
tip,
|
||||||
|
verified: true,
|
||||||
|
error: None,
|
||||||
|
},
|
||||||
|
Err(e) => VerifyOut {
|
||||||
|
mode: "verify",
|
||||||
|
count: 0,
|
||||||
|
tip: GENESIS.into(),
|
||||||
|
verified: false,
|
||||||
|
error: Some(e),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
println!("{}", serde_json::to_string(&out).unwrap());
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user