diff --git a/crates/vectord/src/playbook_memory.rs b/crates/vectord/src/playbook_memory.rs index 5a49190..7ad6f7b 100644 --- a/crates/vectord/src/playbook_memory.rs +++ b/crates/vectord/src/playbook_memory.rs @@ -134,10 +134,87 @@ pub struct PlaybookEntry { /// full version chain. #[serde(default)] pub superseded_by: Option, + /// Phase 45 — external documentation references captured at seal + /// time. One entry per tool/library the procedure consulted. + /// Drives drift detection: when context7 reports a newer version + /// for any entry here than what's in `version_seen`, the playbook + /// is `doc_drift_flagged_at` and excluded from boost until human + /// review clears it. Legacy entries (pre-Phase-45) load with an + /// empty vec — they simply never drift-flag, same as entries + /// without a `schema_fingerprint` in Phase 25. + #[serde(default)] + pub doc_refs: Vec, + /// Phase 45 — set by `flag_doc_drift()` when one or more + /// `doc_refs` entries have a newer version available than + /// `version_seen`. Flagged entries are excluded from boost until + /// `doc_drift_reviewed_at` is set via the /resolve endpoint. + #[serde(default)] + pub doc_drift_flagged_at: Option, + /// Phase 45 — set by human operator via + /// `/vectors/playbook_memory/doc_drift/resolve/{id}` after + /// reviewing the drift diagnosis. Either re-admits the entry to + /// boost (if still applicable) or pairs with `retired_at` / + /// `superseded_by` if the procedure changed. + #[serde(default)] + pub doc_drift_reviewed_at: Option, } fn default_version() -> u32 { 1 } +/// Phase 45 — one external doc reference. Recorded at seal time so +/// drift detection knows what version was consulted. `snippet_hash` +/// lets us detect "same version, different passage" when a library +/// patches docs without bumping the version number. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct DocRef { + /// Canonical tool/library name as context7 knows it, e.g. + /// "docker", "terraform", "react", "next.js". Case-insensitive + /// on compare. + pub tool: String, + /// Version string exactly as seen at seal time. Context7 typically + /// returns semver-like; we store raw string to avoid parsing + /// ambiguity ("latest", "next", "canary" are all valid). + pub version_seen: String, + /// Optional hash of the specific doc passage the procedure + /// referenced. Useful when version hasn't bumped but content + /// rewrote. + #[serde(default)] + pub snippet_hash: Option, + /// Optional direct URL back to the doc (context7 can resolve + /// tool+version → URL, so this is cache not source-of-truth). + #[serde(default)] + pub source_url: Option, + /// When this reference was captured. RFC3339. + pub seen_at: String, +} + +impl Default for PlaybookEntry { + fn default() -> Self { + Self { + playbook_id: String::new(), + operation: String::new(), + approach: String::new(), + context: String::new(), + timestamp: String::new(), + endorsed_names: Vec::new(), + city: None, + state: None, + embedding: None, + schema_fingerprint: None, + valid_until: None, + retired_at: None, + retirement_reason: None, + version: 1, + parent_id: None, + superseded_at: None, + superseded_by: None, + doc_refs: Vec::new(), + doc_drift_flagged_at: None, + doc_drift_reviewed_at: None, + } + } +} + /// A recorded failure — worker who didn't deliver on a contract. /// Tracked per (city, state, name) so a single worker's failures on /// Toledo Welder contracts don't penalize the same name in Chicago. @@ -1201,19 +1278,10 @@ pub async fn rebuild( endorsed_names: names, city, state, - embedding: None, - // Rebuild doesn't know fingerprints; historical entries - // get no retirement signal until a seed with a - // fingerprint supersedes them or the operator calls - // /retire manually. - schema_fingerprint: None, - valid_until: None, - retired_at: None, - retirement_reason: None, - version: 1, - parent_id: None, - superseded_at: None, - superseded_by: None, + // Rebuild doesn't know fingerprints or doc_refs; + // historical entries get no drift signal until a seed + // supersedes them or /retire is called manually. + ..Default::default() } }) .collect(); @@ -1386,20 +1454,12 @@ mod tests { playbook_id: format!("pb-{i}"), operation: "fill: Welder x1 in Toledo, OH".into(), approach: "transfer".into(), - context: "".into(), timestamp: "2026-04-20".into(), endorsed_names: vec!["Deborah Powell".into()], city: Some("Toledo".into()), state: Some("OH".into()), embedding: Some(vec![1.0, 0.0, 0.0]), - schema_fingerprint: None, - valid_until: None, - retired_at: None, - retirement_reason: None, - version: 1, - parent_id: None, - superseded_at: None, - superseded_by: None, + ..Default::default() }) .collect(); tokio::runtime::Runtime::new().unwrap().block_on(async { @@ -1431,12 +1491,7 @@ mod validity_window_tests { embedding: Some(vec![1.0, 0.0, 0.0]), schema_fingerprint: fingerprint, valid_until, - retired_at: None, - retirement_reason: None, - version: 1, - parent_id: None, - superseded_at: None, - superseded_by: None, + ..Default::default() } } @@ -1536,14 +1591,7 @@ mod upsert_tests { city: Some("Nashville".into()), state: Some("TN".into()), embedding: Some(vec![1.0, 0.0, 0.0]), - schema_fingerprint: None, - valid_until: None, - retired_at: None, - retirement_reason: None, - version: 1, - parent_id: None, - superseded_at: None, - superseded_by: None, + ..Default::default() } } @@ -1632,14 +1680,7 @@ mod version_tests { city: Some(city.into()), state: Some(state.into()), embedding: Some(vec![1.0, 0.0, 0.0]), - schema_fingerprint: None, - valid_until: None, - retired_at: None, - retirement_reason: None, - version: 1, - parent_id: None, - superseded_at: None, - superseded_by: None, + ..Default::default() } } diff --git a/crates/vectord/src/service.rs b/crates/vectord/src/service.rs index a1f6b5b..a68ade3 100644 --- a/crates/vectord/src/service.rs +++ b/crates/vectord/src/service.rs @@ -2211,6 +2211,12 @@ struct SeedPlaybookRequest { /// retired, just inactive). Useful for seasonal/temp contracts. #[serde(default)] valid_until: Option, + /// Phase 45 — optional external doc references captured at seal + /// time. Each entry names a tool + version_seen; context7-driven + /// drift check compares against current versions later. None or + /// empty = no drift signal (never flagged). + #[serde(default)] + doc_refs: Option>, } /// Bootstrap / test-only: inject a playbook entry directly into @@ -2232,21 +2238,12 @@ async fn seed_playbook_memory( // Embed the entry through the same text shape `rebuild` uses so // similarity math is comparable across seed + real entries. let tmp_entry = playbook_memory::PlaybookEntry { - playbook_id: String::new(), operation: req.operation.clone(), approach: req.approach.clone(), context: req.context.clone(), timestamp: chrono::Utc::now().to_rfc3339(), endorsed_names: req.endorsed_names.clone(), - city: None, state: None, embedding: None, - schema_fingerprint: None, - valid_until: None, - retired_at: None, - retirement_reason: None, - version: 1, - parent_id: None, - superseded_at: None, - superseded_by: None, + ..Default::default() }; let text = format!( "{} | {} | {} | fills: {}", @@ -2304,12 +2301,11 @@ async fn seed_playbook_memory( // works). valid_until + retired_at start None. schema_fingerprint: req.schema_fingerprint.clone(), valid_until: req.valid_until.clone(), - retired_at: None, - retirement_reason: None, - version: 1, - parent_id: None, - superseded_at: None, - superseded_by: None, + // Phase 45 — seed request may also carry doc_refs; defaults + // empty so pre-Phase-45 callers still work and the entry + // degrades to "no drift signal" (never flagged). + doc_refs: req.doc_refs.clone().unwrap_or_default(), + ..Default::default() }; // Phase 26 — when append=true (default), route through upsert so @@ -2521,6 +2517,11 @@ struct RevisePlaybookRequest { schema_fingerprint: Option, #[serde(default)] valid_until: Option, + /// Phase 45 — updated doc references. Typically a revise happens + /// BECAUSE docs drifted; pass the new versions seen so the revised + /// entry starts with fresh drift signal. + #[serde(default)] + doc_refs: Option>, } /// Phase 27 — create a new version of an existing playbook. The parent @@ -2613,14 +2614,11 @@ async fn revise_playbook_memory( embedding: Some(emb), schema_fingerprint: req.schema_fingerprint, valid_until: req.valid_until, - retired_at: None, - retirement_reason: None, - // revise_entry overwrites these from the parent — values here - // are just placeholders so the struct is well-formed. - version: 1, - parent_id: None, - superseded_at: None, - superseded_by: None, + // Phase 45 — doc_refs may be provided on revise too. + doc_refs: req.doc_refs.clone().unwrap_or_default(), + // revise_entry overwrites version / parent_id / supersession + // from the parent; other fields keep defaults. + ..Default::default() }; let outcome = state.playbook_memory.revise_entry(&req.parent_id, new_entry) diff --git a/docs/CONTROL_PLANE_PRD.md b/docs/CONTROL_PLANE_PRD.md index b30a5a4..dace45d 100644 --- a/docs/CONTROL_PLANE_PRD.md +++ b/docs/CONTROL_PLANE_PRD.md @@ -217,6 +217,38 @@ Ship each phase before starting the next. Each ends with green tests + docs upda --- +## Phase 45 — Doc-drift detection + context7 integration + +**Goal:** Playbooks know which external docs they were written against. When those docs change (Docker adds a feature, npm lib goes major, Terraform renames a resource), the playbook is automatically flagged. Small models never run confidently-outdated procedures — the drift signal reaches them before the next execution does. + +**Why this phase exists at all:** The 0→85% thesis depends on the hyperfocus lane staying valid. External doc drift invalidates the lane silently — popular playbooks can compound the wrong way, accumulating boost while growing more wrong. Phase 25 already retires playbooks on *internal* schema drift; Phase 45 is the same mechanism against *external* doc drift. This is the completion of the learning loop, not an optional add-on. + +**Ships:** +- `shared::types::DocRef` — `{ tool: String, version_seen: String, snippet_hash: Option, source_url: Option, seen_at: DateTime }` +- `PlaybookEntry.doc_refs: Vec` — `#[serde(default)]` so pre-Phase-45 entries load as empty vec +- `/vectors/playbook_memory/seed` + `/revise` accept `doc_refs` in the request body +- `/vectors/playbook_memory/doc_drift/check/{id}` — manual drift check: looks up each `doc_refs[]` entry via the context7 bridge, returns per-tool `{version_seen, version_current, drifted: bool}` plus overall verdict +- `/vectors/playbook_memory/doc_drift/scan` — batch scan across all active playbooks (scheduled path for Phase 45.2) +- `mcp-server/context7_bridge.ts` — Bun HTTP bridge. Exposes `GET /docs/:tool/version` + `GET /docs/:tool/:version/diff?since=X` against the installed context7 MCP plugin. Gateway calls this over localhost. +- `PlaybookMemory::compute_boost_for_filtered_with_role` — excludes entries where `doc_drift_flagged_at.is_some() && doc_drift_review.is_none()` (same rule as retired + superseded) +- Overview model synthesis writes `data/_kb/doc_drift_corrections.jsonl` per detected drift: `{playbook_id, tool, version_seen, version_current, diff_summary, recommended_action, generated_at}` +- Human-in-the-loop re-seal path: `/vectors/playbook_memory/doc_drift/resolve/{id}` — marks reviewed, optionally triggers `revise_entry` if procedure changed + +**Gate:** +- Seal a playbook referencing Docker 24.x → doc_refs captured. Bump Docker version behind the scenes → `/doc_drift/check/{id}` returns `drifted: true, from: 24.0.7, to: 25.0.1, summary: "..."`. The boosted playbook count on next `/vectors/hybrid` query drops by 1 (drift-flagged skipped). +- `doc_drift_corrections.jsonl` contains the overview model's synthesis for the drift with at least: summary of change, recommended action, cost/impact estimate. +- Human calls `/doc_drift/resolve/{id}` after reviewing → playbook returns to active boost pool (or supersedes via Phase 27 if procedure materially changed). +- Unit tests: DocRef serde default (legacy entries load as empty), drift check against mocked context7 bridge, boost exclusion when drifted+unreviewed. + +**Non-goals (explicit):** +- Automatic re-seal without human review. Drift-detection → flag, not silent rewrite. +- Cross-playbook propagation of one drift diagnosis. Each playbook reviewed individually (aggregation later if warranted). +- Generating the updated procedure. T3 *suggests*; human or separate bot (see `bot/`) *writes*. + +**Risk:** Medium. The context7 bridge is new infrastructure (Bun ↔ context7 MCP plugin ↔ HTTP shape for gateway consumption). Mitigation: context7 plugin is already installed; its MCP tools return structured JSON; the bridge is thin adapter code. Start with single-tool drift check (Docker) before broadening. + +--- + ## Long-horizon domains (not in current phase sequence) The architecture was drafted with DevOps execution (Terraform, Ansible) as the eventual target. **That remains aspirational, not current scope** — we don't start wiring `terraform validate` / `ansible-lint` until the staffing domain proves the six-layer architecture at scale.