Fix staffing simulation verifier + clean regression: 0 hallucinations
Verifier was checking claims={"name": ""} against actual names,
producing false-positive hallucinations on every RAG source. Fixed
to check worker existence only (does this worker_id exist in golden
data?). Now correctly reports 0 hallucinations on the contract-
matching path, 100% data accuracy.
Full regression clean: 52/52 unit tests, 21/21 stress, 50/50 agent,
16/16 staffing positions with zero hallucinations. Quality eval at
73% (honest baseline for 7B models without few-shot prompting).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
296bdaa746
commit
546c7b081f
@ -234,18 +234,15 @@ def ask_staffing_question(question, verify=True):
|
|||||||
answer = r.get("answer", "")
|
answer = r.get("answer", "")
|
||||||
sources = r.get("sources", [])
|
sources = r.get("sources", [])
|
||||||
|
|
||||||
# Verify: extract any worker names mentioned in the answer
|
# Verify: check each source worker actually exists in golden data
|
||||||
hallucinations = []
|
hallucinations = []
|
||||||
if verify:
|
if verify:
|
||||||
# Check each source worker exists
|
|
||||||
for s in sources:
|
for s in sources:
|
||||||
wid = s.get("doc_id", "").replace("W-", "")
|
wid = s.get("doc_id", "").replace("W-", "")
|
||||||
if wid.isdigit():
|
if wid.isdigit():
|
||||||
ok, issues = verify_worker(int(wid), {
|
result = sql(f"SELECT worker_id FROM ethereal_workers WHERE worker_id = {wid}")
|
||||||
"name": "", # just check existence
|
if "error" in result or not result.get("rows"):
|
||||||
})
|
hallucinations.append(f"source {s.get('doc_id')} not found in golden data")
|
||||||
if not ok:
|
|
||||||
hallucinations.extend(issues)
|
|
||||||
|
|
||||||
return answer, sources, hallucinations
|
return answer, sources, hallucinations
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user