Fix: UPDATE branch of upsert_entry dropped doc_refs + valid_until
All checks were successful
lakehouse/auditor all checks passed (3 findings, all info)

The auditor's hybrid fixture (branch auditor/scaffold) surfaced this
on 2026-04-22. A re-seed of the same (operation, day) pair with new
endorsed_names merged the names but silently discarded the incoming
doc_refs and valid_until fields. schema_fingerprint was partially
handled (set-if-Some) but doc_refs and valid_until weren't touched.

Root cause: the UPDATE arm of upsert_entry at playbook_memory.rs:609
only covered:
  - endorsed_names (union-merge)
  - timestamp
  - embedding (if Some)
  - schema_fingerprint (if Some)

Fix:
  - valid_until — refresh if caller provides one
  - doc_refs — merge by tool (case-insensitive). Same-tool new entry
    supersedes older one; different-tool refs are appended. Empty
    incoming doc_refs preserves existing (don't wipe on partial seed).

4 new regression tests under upsert_tests:
  - update_merges_doc_refs_with_existing_ones
  - update_same_tool_supersedes_older_version
  - update_preserves_existing_doc_refs_when_new_entry_has_none
  - update_refreshes_valid_until_when_caller_provides_one

Test result: 9/9 upsert tests pass (4 new + 5 pre-existing).

Branch basis note: this branch is off main, so the UpsertOutcome enum
here still has the newtype variants Added(String) / Noop(String). PR
#2 (fix/upsert-outcome-serde) changes that enum to struct-like. When
PR #2 merges first this branch needs a trivial rebase; the UPDATE
arm logic is untouched by that change.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
profit 2026-04-22 04:06:54 -05:00
parent affab8ac83
commit 320009ddf4

View File

@ -615,15 +615,45 @@ impl PlaybookMemory {
}
existing.endorsed_names = merged.clone();
existing.timestamp = new_entry.timestamp.clone();
// Keep original playbook_id + embedding + schema fingerprint.
// Refresh embedding only if the caller passed a non-None
// one (indicates the text shape changed).
// Keep original playbook_id. Refresh embedding only
// if the caller passed a non-None one (indicates
// the text shape changed).
if new_entry.embedding.is_some() {
existing.embedding = new_entry.embedding.clone();
}
// Lifecycle / drift-signal fields — refresh only
// when the caller provides a new value. Each was
// previously either ignored entirely (valid_until,
// doc_refs) or only partially handled. Caught by
// the auditor's hybrid fixture on 2026-04-22 when
// re-seeding with fresh doc_refs silently dropped
// them on the UPDATE path.
if new_entry.schema_fingerprint.is_some() {
existing.schema_fingerprint = new_entry.schema_fingerprint.clone();
}
if new_entry.valid_until.is_some() {
existing.valid_until = new_entry.valid_until.clone();
}
if !new_entry.doc_refs.is_empty() {
// Merge by tool (case-insensitive). A new ref
// for the same tool supersedes the old one —
// caller is asserting "this is what I used
// now." Refs for tools the existing entry
// didn't track are appended. This keeps the
// cumulative tool history without letting a
// single re-seed erase it.
let mut merged_refs = existing.doc_refs.clone();
for new_ref in &new_entry.doc_refs {
let hit = merged_refs.iter().position(|r| {
r.tool.eq_ignore_ascii_case(&new_ref.tool)
});
match hit {
Some(idx) => merged_refs[idx] = new_ref.clone(),
None => merged_refs.push(new_ref.clone()),
}
}
existing.doc_refs = merged_refs;
}
let pid = existing.playbook_id.clone();
drop(state);
self.persist().await?;
@ -1662,6 +1692,117 @@ mod upsert_tests {
assert!(matches!(o, UpsertOutcome::Added(_)));
assert_eq!(pm.entry_count().await, 2);
}
// Regression test for the auditor-discovered bug: UPDATE branch
// silently dropped doc_refs, valid_until, and (partially)
// schema_fingerprint from the incoming entry.
//
// Before the fix: a re-seed with different endorsed_names AND new
// doc_refs would merge names but discard the new doc_refs. After:
// both sets are unioned (doc_refs merged by tool, same-tool
// supersedes).
fn docref(tool: &str, version: &str) -> DocRef {
DocRef {
tool: tool.into(),
version_seen: version.into(),
snippet_hash: Some(format!("hash-{tool}-{version}")),
source_url: None,
seen_at: "2026-04-22T00:00:00Z".into(),
}
}
fn mk_with_docs(op: &str, day: &str, names: &[&str], docs: Vec<DocRef>) -> PlaybookEntry {
let mut e = mk(op, day, names);
e.doc_refs = docs;
e
}
#[tokio::test]
async fn update_merges_doc_refs_with_existing_ones() {
let pm = PlaybookMemory::new(Arc::new(InMemory::new()));
let day = "2026-04-22";
let op = "fill: Welder x2 in Nashville, TN";
// First seed: Alice + Docker doc ref
let e1 = mk_with_docs(op, day, &["Alice"], vec![docref("docker", "24.0.7")]);
let o1 = pm.upsert_entry(e1).await.unwrap();
assert!(matches!(o1, UpsertOutcome::Added(_)));
// Second seed on same (op, day) but new names AND new tool ref
let e2 = mk_with_docs(op, day, &["Bob"], vec![docref("terraform", "1.5.0")]);
let o2 = pm.upsert_entry(e2).await.unwrap();
assert!(matches!(o2, UpsertOutcome::Updated { .. }));
assert_eq!(pm.entry_count().await, 1); // still one entry, merged
let snap = pm.snapshot().await;
assert_eq!(snap.len(), 1);
let merged = &snap[0];
// Names unioned
assert!(merged.endorsed_names.contains(&"Alice".to_string()));
assert!(merged.endorsed_names.contains(&"Bob".to_string()));
// Both doc_refs preserved (different tools → union)
assert_eq!(merged.doc_refs.len(), 2);
let tools: Vec<_> = merged.doc_refs.iter().map(|r| r.tool.as_str()).collect();
assert!(tools.contains(&"docker"));
assert!(tools.contains(&"terraform"));
}
#[tokio::test]
async fn update_same_tool_supersedes_older_version() {
let pm = PlaybookMemory::new(Arc::new(InMemory::new()));
let day = "2026-04-22";
let op = "fill: Welder x2 in Nashville, TN";
let e1 = mk_with_docs(op, day, &["Alice"], vec![docref("docker", "24.0.7")]);
pm.upsert_entry(e1).await.unwrap();
// Same day, different name, SAME tool newer version
let e2 = mk_with_docs(op, day, &["Bob"], vec![docref("docker", "25.0.1")]);
let o2 = pm.upsert_entry(e2).await.unwrap();
assert!(matches!(o2, UpsertOutcome::Updated { .. }));
let snap = pm.snapshot().await;
assert_eq!(snap[0].doc_refs.len(), 1, "same tool should supersede, not duplicate");
assert_eq!(snap[0].doc_refs[0].version_seen, "25.0.1");
}
#[tokio::test]
async fn update_preserves_existing_doc_refs_when_new_entry_has_none() {
let pm = PlaybookMemory::new(Arc::new(InMemory::new()));
let day = "2026-04-22";
let op = "fill: Welder x2 in Nashville, TN";
let e1 = mk_with_docs(op, day, &["Alice"], vec![docref("docker", "24.0.7")]);
pm.upsert_entry(e1).await.unwrap();
// Second seed doesn't carry doc_refs — shouldn't wipe the existing
let e2 = mk(op, day, &["Bob"]); // no doc_refs
pm.upsert_entry(e2).await.unwrap();
let snap = pm.snapshot().await;
assert_eq!(snap[0].doc_refs.len(), 1, "empty new doc_refs shouldn't wipe existing");
assert_eq!(snap[0].doc_refs[0].tool, "docker");
}
#[tokio::test]
async fn update_refreshes_valid_until_when_caller_provides_one() {
let pm = PlaybookMemory::new(Arc::new(InMemory::new()));
let day = "2026-04-22";
let op = "fill: Welder x2 in Nashville, TN";
// First seed with no valid_until
let e1 = mk(op, day, &["Alice"]);
pm.upsert_entry(e1).await.unwrap();
// Second seed with a valid_until deadline
let mut e2 = mk(op, day, &["Bob"]);
e2.valid_until = Some("2026-05-01T00:00:00Z".into());
pm.upsert_entry(e2).await.unwrap();
let snap = pm.snapshot().await;
assert_eq!(snap[0].valid_until.as_deref(), Some("2026-05-01T00:00:00Z"));
}
}
#[cfg(test)]