Add self-improving pipeline: auto-scoring, analytics, reactive refine, routing intelligence

Phase 1 — Run Quality Scoring:
- Auto-score every run in background via qwen2.5 judge (1-10)
- Thumbs up/down vote buttons on output cards
- POST /api/runs/<id>/score for user feedback
- run_saved SSE event enables vote buttons after run completes
- User votes override auto-scores (race-condition safe)
- DB: quality_score, score_method, score_metadata on team_runs

Phase 1 — Analytics Dashboard:
- GET /api/admin/analytics: score-by-mode, score-by-model, heatmap, trend
- New Analytics tab on Admin page with bar charts, heatmap table, trend sparkline
- Scoring coverage tracker (scored vs total runs)
- Model × Mode heatmap with color-coded cells

Phase 2 — Reactive Pipeline:
- _assess_stage(): orchestrator evaluates each stage's output mid-run
- _reactive_decide(): can insert/skip stages based on assessment
- Dynamic stage loop replaces fixed iteration in run_refine()
- Budget tracking prevents infinite loops (max_stages hard cap)
- Reactive decisions render as dashed notification bars between cards
- Pipeline adjusts in real-time: "Inserting VALIDATE — high severity gaps found"

Phase 3 — Cross-Run Learning:
- _build_routing_table(): queries historical scores for model×mode performance
- Best stage sequences per content_type from pipeline_runs
- Routing table cached with 30-min TTL
- Auto-Refine strategist prompt augmented with historical data
- GET /api/suggest-models?mode=X returns top 3 models for that mode

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
root 2026-03-29 06:18:32 -05:00
parent c2cc211f21
commit 8ad221b41f

View File

@ -1776,16 +1776,73 @@ def get_db():
def save_run(mode, prompt, config_data, responses):
models = list({r.get("model", "") for r in responses if r.get("model")})
run_id = None
try:
with get_db() as conn:
with conn.cursor() as cur:
cur.execute(
"INSERT INTO team_runs (mode, prompt, config, responses, models_used) VALUES (%s, %s, %s, %s, %s)",
"INSERT INTO team_runs (mode, prompt, config, responses, models_used) VALUES (%s, %s, %s, %s, %s) RETURNING id",
(mode, prompt, json.dumps(config_data), json.dumps(responses), models)
)
run_id = cur.fetchone()[0]
conn.commit()
except Exception as e:
print(f"[DB] save_run error: {e}")
if run_id and responses:
threading.Thread(target=_auto_score_run, args=(run_id, mode, prompt, responses), daemon=True).start()
return run_id
# ─── AUTO-SCORING ENGINE ─────────────────────────────────────
_SCORE_MODEL = "qwen2.5:latest"
def _auto_score_run(run_id, mode, prompt, responses):
"""Background: auto-score a completed run via judge model."""
try:
# Pick the longest non-error response as representative
candidates = [r for r in responses if r.get("role") != "error" and r.get("text")]
if not candidates:
return
best = max(candidates, key=lambda r: len(r.get("text", "")))
text = best["text"][:3000]
judge_prompt = (
f"Rate the quality of this AI response on a scale of 1-10.\n"
f"Consider: relevance to the prompt, completeness, accuracy, clarity, usefulness.\n\n"
f"PROMPT: {prompt[:500]}\n\n"
f"MODE: {mode}\n\n"
f"RESPONSE:\n{text}\n\n"
f"Return ONLY a JSON object: {{\"score\": N, \"reason\": \"one sentence\"}}"
)
judgment = query_model(_SCORE_MODEL, judge_prompt)
# Parse score
score = None
try:
j_start = judgment.find("{")
j_end = judgment.rfind("}") + 1
if j_start >= 0 and j_end > j_start:
parsed = json.loads(judgment[j_start:j_end])
score = float(parsed.get("score", 0))
except Exception:
pass
if score is None:
m = re.search(r'\b([1-9]|10)\b', judgment)
score = float(m.group(1)) if m else None
if score is None or score < 1 or score > 10:
return
with get_db() as conn:
with conn.cursor() as cur:
cur.execute(
"UPDATE team_runs SET quality_score = %s, score_method = 'auto', score_metadata = %s WHERE id = %s AND (score_method IS NULL OR score_method = 'auto')",
(score, json.dumps({"judge": _SCORE_MODEL, "judgment": judgment[:500], "scored_model": best.get("model", ""), "reason": judgment[:200]}), run_id)
)
conn.commit()
print(f"[SCORE] run {run_id} scored {score}/10 by {_SCORE_MODEL}")
except Exception as e:
print(f"[SCORE] auto-score error for run {run_id}: {e}")
HTML = r"""
<!DOCTYPE html>
@ -2881,6 +2938,7 @@ function buildConfig() {
let _runStartTime = 0;
let _runTimer = null;
let _lastRunId = null;
let _runEventCount = 0;
let _runResponseCount = 0;
let _runTotalChars = 0;
@ -3132,6 +3190,11 @@ function handleEvent(evt) {
return;
}
if (evt.type === 'done') { const bar = output.querySelector('.status-bar'); if (bar) bar.remove(); return; }
if (evt.type === 'run_saved') {
_lastRunId = evt.run_id;
document.querySelectorAll('.vote-btn').forEach(function(b) { b.disabled = false; });
return;
}
if (evt.type === 'response') {
_runResponseCount++;
_runTotalChars += (evt.text || '').length;
@ -3150,6 +3213,14 @@ function handleEvent(evt) {
label.textContent = phaseName;
output.appendChild(label);
}
// Reactive pipeline notification not a full card
if (evt.role === 'reactive') {
var note = document.createElement('div');
note.style.cssText = 'font-family:JetBrains Mono,monospace;font-size:10px;color:var(--accent);border:1px dashed var(--accent);border-radius:2px;padding:8px 12px;margin:4px 0;opacity:0.8;font-style:italic';
note.textContent = '\u26A1 ' + evt.text;
output.appendChild(note);
return;
}
const mi = availableModels.findIndex(m => m.name === evt.model);
const color = COLORS[(mi >= 0 ? mi : 0) % COLORS.length];
const displayName = mi >= 0 ? (availableModels[mi].display_name || evt.model) : evt.model;
@ -3161,7 +3232,7 @@ function handleEvent(evt) {
const roleTag = evt.role ? `<span class="role-tag">${evt.role}</span>` : '';
const uid = 'resp-' + Date.now() + '-' + Math.random().toString(36).substr(2,4);
const errorLink = isError ? `<a class="error-link" href="/admin/monitor">View error details in monitor </a>` : '';
card.innerHTML = `<div class="card-header" style="cursor:pointer" onclick="openRepipe('${uid}')"><div class="dot" style="background:${isError ? 'var(--red)' : color}"></div>${displayName}${roleTag}</div><div class="card-body" id="${uid}">${escapeHtml(evt.text)}</div>${errorLink}<div class="card-actions"><button class="card-act" onclick="event.stopPropagation();copyCard('${uid}',this)">Copy</button><button class="card-act" onclick="event.stopPropagation();useAsPrompt('${uid}')">Use as Prompt</button><button class="card-act" onclick="event.stopPropagation();openRepipe('${uid}')">Iterate</button></div>`;
card.innerHTML = `<div class="card-header" style="cursor:pointer" onclick="openRepipe('${uid}')"><div class="dot" style="background:${isError ? 'var(--red)' : color}"></div>${displayName}${roleTag}</div><div class="card-body" id="${uid}">${escapeHtml(evt.text)}</div>${errorLink}<div class="card-actions"><button class="card-act" onclick="event.stopPropagation();copyCard('${uid}',this)">Copy</button><button class="card-act" onclick="event.stopPropagation();useAsPrompt('${uid}')">Use as Prompt</button><button class="card-act" onclick="event.stopPropagation();openRepipe('${uid}')">Iterate</button><span style="flex:1"></span><button class="card-act vote-btn" disabled onclick="event.stopPropagation();voteRun(this,'up')" title="Good output">\u{1F44D}</button><button class="card-act vote-btn" disabled onclick="event.stopPropagation();voteRun(this,'down')" title="Bad output">\u{1F44E}</button></div>`;
card.dataset.model = evt.model;
card.dataset.role = evt.role || '';
card.dataset.displayName = displayName;
@ -3174,6 +3245,22 @@ function handleEvent(evt) {
function escapeHtml(t) { return t.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;'); }
// CARD ACTIONS
async function voteRun(btn, vote) {
if (!_lastRunId) return;
try {
const r = await fetch('/api/runs/' + _lastRunId + '/score', {
method: 'POST', headers: {'Content-Type': 'application/json'},
body: JSON.stringify({vote: vote})
});
if (r.ok) {
btn.closest('.card-actions').querySelectorAll('.vote-btn').forEach(function(b) { b.style.opacity = '0.3'; b.disabled = true; });
btn.style.opacity = '1';
btn.style.borderColor = vote === 'up' ? 'var(--green)' : 'var(--red)';
btn.style.color = vote === 'up' ? 'var(--green)' : 'var(--red)';
}
} catch(e) { console.error('Vote error:', e); }
}
function copyCard(uid, btn) {
const el = document.getElementById(uid);
if (!el) return;
@ -3584,6 +3671,7 @@ ADMIN_HTML = r"""
<div class="tab" onclick="switchTab('openrouter')">OpenRouter</div>
<div class="tab" onclick="switchTab('timeouts')">Timeouts</div>
<div class="tab" onclick="switchTab('security')">Security</div>
<div class="tab" onclick="switchTab('analytics')">Analytics</div>
</div>
<!-- PROVIDERS TAB -->
@ -3687,6 +3775,32 @@ ADMIN_HTML = r"""
<div id="allowlist"></div>
</div>
</div>
<!-- ANALYTICS TAB -->
<div id="tab-analytics" class="tab-content">
<div class="card" id="ana-coverage-card">
<h3>Scoring Coverage</h3>
<div id="ana-coverage" style="font-size:13px;color:var(--text2)">Loading...</div>
</div>
<div style="display:grid;grid-template-columns:1fr 1fr;gap:12px">
<div class="card">
<h3>Score by Mode</h3>
<div id="ana-by-mode" style="font-size:12px">Loading...</div>
</div>
<div class="card">
<h3>Score by Model</h3>
<div id="ana-by-model" style="font-size:12px">Loading...</div>
</div>
</div>
<div class="card">
<h3>Model × Mode Heatmap</h3>
<div id="ana-heatmap" style="font-size:11px;overflow-x:auto">Loading...</div>
</div>
<div class="card">
<h3>Score Trend (30 days)</h3>
<div id="ana-trend" style="font-size:12px">Loading...</div>
</div>
</div>
</div>
<script>
@ -3918,6 +4032,91 @@ function switchTab(name) {
if (name === 'timeouts') renderTimeouts();
if (name === 'models') { loadOllamaModels(); renderCloudModels(); }
if (name === 'security') { loadDemoStatus(); loadAllowlist(); }
if (name === 'analytics') loadAnalytics();
}
async function loadAnalytics() {
try {
var r = await fetch('/api/admin/analytics');
var d = await r.json();
if (d.error) { document.getElementById('ana-coverage').textContent = 'Error: ' + d.error; return; }
// Coverage
var cov = d.coverage || {};
document.getElementById('ana-coverage').innerHTML =
'<strong>' + (cov.scored||0) + '</strong> / ' + (cov.total||0) + ' runs scored (' +
(cov.total ? Math.round((cov.scored||0)/(cov.total)*100) : 0) + '%)';
// Score by Mode - horizontal bars
var modeHtml = '';
(d.by_mode||[]).forEach(function(m) {
var pct = Math.round((parseFloat(m.avg_score)||0) * 10);
modeHtml += '<div style="display:flex;align-items:center;gap:8px;margin-bottom:4px">' +
'<span style="min-width:100px;font-family:JetBrains Mono,monospace;font-size:10px;text-transform:uppercase">' + m.mode + '</span>' +
'<div style="flex:1;height:14px;background:rgba(0,0,0,0.15);border-radius:2px;overflow:hidden">' +
'<div style="width:' + pct + '%;height:100%;background:var(--accent);border-radius:2px"></div></div>' +
'<span style="font-family:JetBrains Mono,monospace;font-size:11px;font-weight:700;min-width:30px">' + m.avg_score + '</span>' +
'<span style="font-size:9px;color:var(--text2)">' + m.runs + ' runs</span></div>';
});
document.getElementById('ana-by-mode').innerHTML = modeHtml || '<span style="color:var(--text2)">No scored runs yet</span>';
// Score by Model
var modelHtml = '';
(d.by_model||[]).forEach(function(m) {
var pct = Math.round((parseFloat(m.avg_score)||0) * 10);
var name = m.model.length > 20 ? m.model.substring(0,18) + '..' : m.model;
modelHtml += '<div style="display:flex;align-items:center;gap:8px;margin-bottom:4px">' +
'<span style="min-width:120px;font-family:JetBrains Mono,monospace;font-size:10px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap">' + name + '</span>' +
'<div style="flex:1;height:14px;background:rgba(0,0,0,0.15);border-radius:2px;overflow:hidden">' +
'<div style="width:' + pct + '%;height:100%;background:var(--green);border-radius:2px"></div></div>' +
'<span style="font-family:JetBrains Mono,monospace;font-size:11px;font-weight:700;min-width:30px">' + m.avg_score + '</span>' +
'<span style="font-size:9px;color:var(--text2)">' + m.runs + '</span></div>';
});
document.getElementById('ana-by-model').innerHTML = modelHtml || '<span style="color:var(--text2)">No data yet</span>';
// Heatmap - table
var hm = d.heatmap || [];
if (hm.length) {
var models = [...new Set(hm.map(function(h){return h.model}))];
var modes = [...new Set(hm.map(function(h){return h.mode}))];
var lookup = {};
hm.forEach(function(h) { lookup[h.mode+'|'+h.model] = h.avg_score; });
var tbl = '<table style="width:100%;border-collapse:collapse;font-family:JetBrains Mono,monospace;font-size:10px"><tr><th style="text-align:left;padding:4px">Mode</th>';
models.forEach(function(m) { tbl += '<th style="padding:4px;text-align:center">' + (m.length>12?m.substring(0,10)+'..':m) + '</th>'; });
tbl += '</tr>';
modes.forEach(function(mode) {
tbl += '<tr><td style="padding:4px;text-transform:uppercase;letter-spacing:0.5px">' + mode + '</td>';
models.forEach(function(model) {
var score = lookup[mode+'|'+model];
var bg = score ? 'rgba(' + (score >= 7 ? '74,222,128' : score >= 5 ? '226,181,90' : '224,82,82') + ',' + (parseFloat(score)/15) + ')' : 'transparent';
tbl += '<td style="padding:4px;text-align:center;background:' + bg + ';font-weight:700">' + (score || '-') + '</td>';
});
tbl += '</tr>';
});
tbl += '</table>';
document.getElementById('ana-heatmap').innerHTML = tbl;
} else {
document.getElementById('ana-heatmap').innerHTML = '<span style="color:var(--text2)">Need 2+ scored runs per model/mode combination</span>';
}
// Trend
var trend = d.trend || [];
if (trend.length) {
var trendHtml = '<div style="display:flex;align-items:flex-end;gap:2px;height:80px">';
var maxRuns = Math.max(...trend.map(function(t){return t.runs}));
trend.forEach(function(t) {
var h = Math.max(4, Math.round((t.runs/maxRuns)*70));
var color = parseFloat(t.avg_score) >= 7 ? 'var(--green)' : parseFloat(t.avg_score) >= 5 ? 'var(--accent)' : 'var(--red)';
trendHtml += '<div title="' + t.day + ': ' + t.avg_score + ' avg (' + t.runs + ' runs)" style="flex:1;height:' + h + 'px;background:' + color + ';border-radius:1px;min-width:4px"></div>';
});
trendHtml += '</div><div style="display:flex;justify-content:space-between;font-size:8px;color:var(--text2);margin-top:4px"><span>' + trend[0].day + '</span><span>' + trend[trend.length-1].day + '</span></div>';
document.getElementById('ana-trend').innerHTML = trendHtml;
} else {
document.getElementById('ana-trend').innerHTML = '<span style="color:var(--text2)">No data in last 30 days</span>';
}
} catch(e) {
document.getElementById('ana-coverage').textContent = 'Error loading analytics: ' + e.message;
}
}
async function loadDemoStatus() {
@ -5795,6 +5994,63 @@ def admin_mass_ban():
# ─── ADMIN MONITOR ─────────────────────────────────────────────
@app.route("/api/admin/analytics")
@admin_required
def admin_analytics():
"""Analytics: score-by-mode, score-by-model, heatmap, trend."""
try:
with get_db() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute("""
SELECT mode, COUNT(*) as runs, ROUND(AVG(quality_score)::numeric, 2) as avg_score,
ROUND(STDDEV(quality_score)::numeric, 2) as std_score
FROM team_runs WHERE quality_score IS NOT NULL
GROUP BY mode ORDER BY avg_score DESC
""")
by_mode = [dict(r) for r in cur.fetchall()]
cur.execute("""
SELECT m as model, COUNT(*) as runs, ROUND(AVG(quality_score)::numeric, 2) as avg_score
FROM team_runs, unnest(models_used) as m
WHERE quality_score IS NOT NULL
GROUP BY m ORDER BY avg_score DESC
""")
by_model = [dict(r) for r in cur.fetchall()]
cur.execute("""
SELECT mode, m as model, COUNT(*) as runs, ROUND(AVG(quality_score)::numeric, 2) as avg_score
FROM team_runs, unnest(models_used) as m
WHERE quality_score IS NOT NULL
GROUP BY mode, m HAVING COUNT(*) >= 2
ORDER BY avg_score DESC
""")
heatmap = [dict(r) for r in cur.fetchall()]
cur.execute("""
SELECT DATE(created_at) as day, COUNT(*) as runs, ROUND(AVG(quality_score)::numeric, 2) as avg_score
FROM team_runs WHERE quality_score IS NOT NULL AND created_at > NOW() - INTERVAL '30 days'
GROUP BY DATE(created_at) ORDER BY day
""")
trend = [{"day": str(r["day"]), "runs": r["runs"], "avg_score": float(r["avg_score"])} for r in cur.fetchall()]
cur.execute("SELECT COUNT(*) as total, COUNT(quality_score) as scored FROM team_runs WHERE archived = false")
coverage = dict(cur.fetchone())
return jsonify({"by_mode": by_mode, "by_model": by_model, "heatmap": heatmap, "trend": trend, "coverage": coverage})
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route("/api/suggest-models")
@login_required
def suggest_models():
"""Return top-performing models for a given mode based on historical scores."""
mode = request.args.get("mode", "")
routing = _build_routing_table()
perf = routing.get("model_perf", {}).get(mode, [])
return jsonify({"mode": mode, "suggestions": perf[:3]})
@app.route("/admin/monitor")
@admin_required
def monitor_page():
@ -6159,6 +6415,29 @@ def delete_run(run_id):
return jsonify({"error": str(e)}), 500
@app.route("/api/runs/<int:run_id>/score", methods=["POST"])
@login_required
def score_run(run_id):
"""User thumbs up/down on a run — overrides auto-score."""
data = request.json or {}
vote = data.get("vote")
if vote not in ("up", "down"):
return jsonify({"error": "vote must be 'up' or 'down'"}), 400
score = 8.0 if vote == "up" else 3.0
method = f"user_{vote}"
try:
with get_db() as conn:
with conn.cursor() as cur:
cur.execute(
"UPDATE team_runs SET quality_score = %s, score_method = %s, score_metadata = score_metadata || %s WHERE id = %s",
(score, method, json.dumps({"user": session.get("username", "unknown"), "voted_at": time.time()}), run_id)
)
conn.commit()
return jsonify({"ok": True, "score": score, "method": method})
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route("/api/runs/<int:run_id>/archive", methods=["POST"])
@login_required
def archive_run(run_id):
@ -7537,7 +7816,9 @@ def run_team():
_log_run(dict(run, run_id=run_id))
_active_runs.pop(run_id, None)
if collected:
save_run(mode, config.get("prompt", ""), config, collected)
rid = save_run(mode, config.get("prompt", ""), config, collected)
if rid:
yield sse({"type": "run_saved", "run_id": rid})
return Response(generate(), mimetype="text/event-stream",
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no", "Connection": "keep-alive"})
@ -8533,6 +8814,105 @@ def run_extract(config):
_save_pipeline("extract", prompt or source, steps, result_data, all_models, start)
# ─── CROSS-RUN LEARNING ───────────────────────────────────────
_routing_table = {}
_routing_table_ts = 0
_ROUTING_TTL = 1800 # 30 minutes
def _build_routing_table():
"""Build routing intelligence from historical scored runs."""
global _routing_table, _routing_table_ts
now = time.time()
if _routing_table and (now - _routing_table_ts) < _ROUTING_TTL:
return _routing_table
try:
with get_db() as conn:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
# Best model per mode
cur.execute("""
SELECT mode, m as model, ROUND(AVG(quality_score)::numeric, 2) as avg_score, COUNT(*) as runs
FROM team_runs, unnest(models_used) as m
WHERE quality_score IS NOT NULL AND quality_score >= 5
GROUP BY mode, m HAVING COUNT(*) >= 2
ORDER BY mode, avg_score DESC
""")
model_perf = {}
for r in cur.fetchall():
mode = r["mode"]
if mode not in model_perf:
model_perf[mode] = []
model_perf[mode].append({"model": r["model"], "avg_score": float(r["avg_score"]), "runs": r["runs"]})
# Best stage sequences for refine pipelines
cur.execute("""
SELECT result->>'content_type' as content_type,
result->'stages_run' as stages,
COUNT(*) as runs
FROM pipeline_runs
WHERE pipeline = 'refine' AND result->>'content_type' IS NOT NULL
GROUP BY result->>'content_type', result->'stages_run'
ORDER BY runs DESC
""")
stage_perf = {}
for r in cur.fetchall():
ct = r["content_type"]
if ct and ct not in stage_perf:
stage_perf[ct] = {"stages": r["stages"], "runs": r["runs"]}
_routing_table = {"model_perf": model_perf, "stage_perf": stage_perf}
_routing_table_ts = now
except Exception as e:
print(f"[ROUTING] build error: {e}")
return _routing_table
def _assess_stage(orchestrator, stage_name, stage_output, content_type):
"""Assess a stage's output — returns structured metadata for reactive decisions."""
assess_prompt = (
f"You just reviewed the output of a {stage_name} stage on a {content_type}.\n"
f"Assess the output briefly. Return ONLY a JSON object:\n"
f'{{"confidence": 0.0-1.0, "gaps": ["gap1", "gap2"], "severity": "low|medium|high", "suggest_stage": null}}\n\n'
f"If the output reveals a critical problem that needs a specific follow-up stage, set suggest_stage to one of: "
f"VALIDATE, CRITIQUE, EXPAND, STRUCTURE, STAKEHOLDER, CLARITY, EDGE_CASES, ALIGN\n"
f"Otherwise leave suggest_stage as null.\n\n"
f"OUTPUT TO ASSESS:\n{stage_output[:2000]}"
)
try:
raw = safe_query(orchestrator, assess_prompt)
text = raw.strip()
j_start = text.find("{")
j_end = text.rfind("}") + 1
if j_start >= 0 and j_end > j_start:
return json.loads(text[j_start:j_end])
except Exception:
pass
return {"confidence": 0.5, "gaps": [], "severity": "low", "suggest_stage": None}
def _reactive_decide(assessment, remaining_stages, stages_executed, max_stages):
"""Decide whether to insert, skip, or continue based on assessment."""
budget_left = max_stages - stages_executed
if budget_left <= 1:
return "continue", None, "budget exhausted"
suggested = assessment.get("suggest_stage")
severity = assessment.get("severity", "low")
confidence = assessment.get("confidence", 0.5)
# Insert a stage if the assessment suggests one and it's not already planned
if suggested and suggested not in remaining_stages and severity in ("medium", "high") and budget_left >= 2:
return "insert", suggested, f"{severity} severity — {', '.join(assessment.get('gaps', [])[:2])}"
# Skip next stage if confidence is very high and remaining stage seems redundant
if confidence > 0.9 and len(remaining_stages) > 1 and severity == "low":
next_stage = remaining_stages[0]
# Don't skip synthesis-oriented stages
if next_stage in ("EXPAND", "CLARITY"):
return "skip", next_stage, f"high confidence ({confidence:.0%}) — {next_stage} likely unnecessary"
return "continue", None, None
def run_refine(config):
"""Auto-Refine: AI analyzes content, selects the best sequence of modes, executes them, synthesizes final version."""
import time
@ -8549,10 +8929,20 @@ def run_refine(config):
yield sse({"type": "status", "message": "Analyzing content and planning refinement pipeline..."})
yield sse({"type": "progress", "current": 0, "total": 3, "label": "analyzing"})
# Inject routing intelligence from historical data
routing = _build_routing_table()
routing_context = ""
if routing.get("stage_perf"):
routing_context = "\nHISTORICAL DATA (from past successful runs):\n"
for ct, data in list(routing["stage_perf"].items())[:5]:
routing_context += f"- For '{ct}' content, sequence {data['stages']} was used {data['runs']} times\n"
routing_context += "Use this as guidance but adapt to the specific content.\n"
plan_prompt = f"""You are a refinement strategist. Analyze this content and determine the optimal sequence of refinement stages to improve it.
CONTENT TO REFINE:
{prompt[:8000]}
{routing_context}
AVAILABLE REFINEMENT STAGES (pick 3-{max_stages} in the best order):
- VALIDATE: Fact-check claims, verify accuracy, flag unsupported statements
@ -8617,11 +9007,18 @@ Pick ONLY the stages that will meaningfully improve THIS specific content. Not e
}
prev_output = ""
for si, stage in enumerate(stages):
stage_num = si + 1
worker = workers[si % len(workers)]
yield sse({"type": "progress", "current": stage_num, "total": total_stages, "label": stage.lower()})
yield sse({"type": "status", "message": f"Stage {stage_num}/{total_stages}: {stage} ({worker})..."})
remaining = list(stages)
stages_executed = 0
worker_idx = 0
while remaining and stages_executed < max_stages:
stage = remaining.pop(0)
stages_executed += 1
worker = workers[worker_idx % len(workers)]
worker_idx += 1
total_stages = stages_executed + len(remaining) + 1 # +1 for synthesis
yield sse({"type": "progress", "current": stages_executed, "total": total_stages, "label": stage.lower()})
yield sse({"type": "status", "message": f"Stage {stages_executed}: {stage} ({worker})..."})
template = stage_prompts.get(stage, "Analyze and improve this {type}:\n\n{content}")
stage_prompt = template.format(
@ -8637,13 +9034,26 @@ Pick ONLY the stages that will meaningfully improve THIS specific content. Not e
prev_output = result
steps.append({"step": stage, "model": worker, "output": result[:1000]})
# For STRUCTURE and CLARITY stages, the output replaces the working content
if stage in ("STRUCTURE", "CLARITY"):
current_content = result
# Reactive assessment — should we adjust the pipeline?
if remaining and stages_executed < max_stages - 1:
assessment = _assess_stage(orchestrator, stage, result, content_type)
decision, target, reason = _reactive_decide(assessment, remaining, stages_executed, max_stages)
if decision == "insert" and target:
remaining.insert(0, target)
yield sse({"type": "response", "model": "system", "text": f"Reactive: inserting {target} stage — {reason}", "role": "reactive"})
steps.append({"step": "reactive_insert", "target": target, "reason": reason})
elif decision == "skip" and target:
remaining.remove(target)
yield sse({"type": "response", "model": "system", "text": f"Reactive: skipping {target}{reason}", "role": "reactive"})
steps.append({"step": "reactive_skip", "target": target, "reason": reason})
except Exception as e:
yield sse({"type": "response", "model": worker, "text": f"{stage} failed: {e}", "role": "error"})
stage_outputs[stage] = f"Error: {e}"
total_stages = stages_executed + 1
# Stage 3: Final synthesis — combine all insights into the definitive refined version
yield sse({"type": "progress", "current": total_stages, "total": total_stages, "label": "synthesize"})
yield sse({"type": "status", "message": f"Final synthesis with {orchestrator}..."})