Add progress tracking, admin monitor, SSE keepalive, research hardening

Backend: - Active run tracking with step/substep/error state - SSE keepalive heartbeat every 15s to prevent nginx timeout - Run log (last 100 completed runs with timing/errors) - Research mode: per-question progress, context caps, graceful failures - Hard cap on research questions (15), answer truncation (8K chars) Frontend: - Real progress bar with step segments, elapsed time, event counter - Progress shimmer animation, step completion indicators - Improved error display with timing context - Green completion state with fade Admin: - /admin/monitor — live process dashboard - Stats: active runs, completed, errors, avg duration - Active run cards with live progress, substep detail, errors - Recent run history with error traces - Auto-polls every 3 seconds - Full retro-brutalist theme matching main UI Nginx: - proxy_read_timeout 600s, proxy_send_timeout 600s - proxy_buffering off for SSE streaming Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 01:22:36 -05:00 · 2026-03-26 01:22:36 -05:00 · 242dec7509
commit 242dec7509
parent 8cbc2bec84
1 changed files with 425 additions and 28 deletions
--- a/llm_team_ui.py
+++ b/llm_team_ui.py
@ -645,6 +645,22 @@ HTML = r"""
  .status-bar { display: flex; align-items: center; gap: 8px; padding: 10px 14px; background: rgba(0,0,0,0.3); border: 2px solid var(--border); border-radius: 2px; font-size: 11px; color: var(--text2); font-family: 'JetBrains Mono', monospace; text-transform: uppercase; letter-spacing: 0.5px; }
  .spinner { width: 14px; height: 14px; border: 2px solid var(--border); border-top-color: var(--accent); border-radius: 50%; animation: spin 0.7s linear infinite; }
  @keyframes spin { to { transform: rotate(360deg); } }
+  .progress-panel { background: rgba(0,0,0,0.3); border: 2px solid var(--border); border-radius: 2px; padding: 14px; margin-bottom: 10px; }
+  .progress-header { display: flex; align-items: center; justify-content: space-between; margin-bottom: 10px; }
+  .progress-header .prog-mode { font-family: 'JetBrains Mono', monospace; font-size: 10px; text-transform: uppercase; letter-spacing: 1.5px; color: var(--accent); font-weight: 700; }
+  .progress-header .prog-time { font-family: 'JetBrains Mono', monospace; font-size: 10px; color: var(--text2); letter-spacing: 0.5px; }
+  .progress-track { height: 6px; background: rgba(0,0,0,0.4); border: 1px solid var(--border); border-radius: 1px; overflow: hidden; margin-bottom: 8px; }
+  .progress-fill { height: 100%; background: var(--accent); transition: width 0.4s ease; box-shadow: 0 0 10px rgba(226,181,90,0.3); position: relative; }
+  .progress-fill::after { content: ''; position: absolute; right: 0; top: 0; bottom: 0; width: 20px; background: linear-gradient(90deg, transparent, var(--accent2)); animation: progress-shimmer 1.5s ease-in-out infinite; }
+  @keyframes progress-shimmer { 0%,100% { opacity: 0.3; } 50% { opacity: 1; } }
+  .progress-steps { display: flex; gap: 4px; margin-bottom: 8px; }
+  .progress-step { flex: 1; height: 3px; background: rgba(0,0,0,0.4); border-radius: 1px; transition: background 0.3s; }
+  .progress-step.done { background: var(--accent); }
+  .progress-step.active { background: var(--accent); animation: step-pulse 1s ease-in-out infinite; }
+  @keyframes step-pulse { 0%,100% { opacity: 1; } 50% { opacity: 0.5; } }
+  .progress-detail { font-family: 'JetBrains Mono', monospace; font-size: 10px; color: var(--text2); display: flex; justify-content: space-between; }
+  .progress-detail .prog-substep { max-width: 70%; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
+  .progress-detail .prog-stats { color: var(--text2); opacity: 0.6; }
  .sample-prompts { display: flex; flex-wrap: wrap; gap: 6px; margin: 8px 0; }
  .sample-chip { background: rgba(0,0,0,0.3); border: 1px solid var(--border); border-radius: 2px; padding: 6px 12px; font-size: 11px; color: var(--text2); cursor: pointer; transition: all 0.15s; line-height: 1.4; max-width: 100%; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
  .sample-chip:hover { border-color: var(--accent); color: var(--accent); background: var(--glow); }
@ -1257,15 +1273,83 @@ function buildConfig() {
  return c;
 }

+let _runStartTime = 0;
+let _runTimer = null;
+let _runEventCount = 0;
+let _runResponseCount = 0;
+
+function formatElapsed(ms) {
+  const s = Math.floor(ms / 1000);
+  if (s < 60) return s + 's';
+  return Math.floor(s/60) + 'm ' + (s%60) + 's';
+}
+
+function updateProgressTime() {
+  const el = document.getElementById('prog-time');
+  if (el && _runStartTime) el.textContent = formatElapsed(Date.now() - _runStartTime);
+  const ev = document.getElementById('prog-events');
+  if (ev) ev.textContent = _runEventCount + ' events / ' + _runResponseCount + ' responses';
+}
+
 async function runTeam() {
  const config = buildConfig();
  if (!config) return;
  const btn = document.getElementById('run-btn');
  btn.disabled = true; btn.textContent = 'Running...';
  const output = document.getElementById('output');
-  output.innerHTML = '<div class="status-bar"><div class="spinner"></div><span>Starting team...</span></div>';
+  _runStartTime = Date.now();
+  _runEventCount = 0;
+  _runResponseCount = 0;
+  const progEl = document.createElement('div');
+  progEl.className = 'progress-panel';
+  progEl.id = 'run-progress';
+  progEl.textContent = '';
+  const header = document.createElement('div');
+  header.className = 'progress-header';
+  const modeLabel = document.createElement('span');
+  modeLabel.className = 'prog-mode';
+  modeLabel.textContent = currentMode;
+  const timeLabel = document.createElement('span');
+  timeLabel.className = 'prog-time';
+  timeLabel.id = 'prog-time';
+  timeLabel.textContent = '0s';
+  header.appendChild(modeLabel);
+  header.appendChild(timeLabel);
+  progEl.appendChild(header);
+  const track = document.createElement('div');
+  track.className = 'progress-track';
+  const fill = document.createElement('div');
+  fill.className = 'progress-fill';
+  fill.id = 'prog-fill';
+  fill.style.width = '2%';
+  track.appendChild(fill);
+  progEl.appendChild(track);
+  const stepsDiv = document.createElement('div');
+  stepsDiv.className = 'progress-steps';
+  stepsDiv.id = 'prog-steps';
+  progEl.appendChild(stepsDiv);
+  const detail = document.createElement('div');
+  detail.className = 'progress-detail';
+  const substep = document.createElement('span');
+  substep.className = 'prog-substep';
+  substep.id = 'prog-substep';
+  substep.textContent = 'Initializing...';
+  const stats = document.createElement('span');
+  stats.className = 'prog-stats';
+  stats.id = 'prog-events';
+  stats.textContent = '0 events';
+  detail.appendChild(substep);
+  detail.appendChild(stats);
+  progEl.appendChild(detail);
+  output.textContent = '';
+  output.appendChild(progEl);
+  _runTimer = setInterval(updateProgressTime, 1000);
  try {
    const resp = await fetch('/api/run', { method: 'POST', headers: {'Content-Type': 'application/json'}, body: JSON.stringify(config) });
+    if (!resp.ok) {
+      const errData = await resp.json().catch(function() { return {error: 'HTTP ' + resp.status}; });
+      throw new Error(errData.error || 'HTTP ' + resp.status);
+    }
    const reader = resp.body.getReader();
    const decoder = new TextDecoder();
    let buffer = '';
@ -1276,26 +1360,80 @@ async function runTeam() {
      const lines = buffer.split('\n');
      buffer = lines.pop();
      for (const line of lines) {
-        if (line.startsWith('data: ')) { try { handleEvent(JSON.parse(line.slice(6))); } catch(e) {} }
+        if (line.startsWith('data: ')) { try { _runEventCount++; handleEvent(JSON.parse(line.slice(6))); } catch(e) {} }
      }
    }
  } catch(e) {
-    output.innerHTML = `<div class="status-bar" style="color:var(--red)">Error: ${e.message}</div>`;
+    const errDiv = document.createElement('div');
+    errDiv.className = 'status-bar';
+    errDiv.style.color = 'var(--red)';
+    errDiv.style.borderColor = 'var(--red)';
+    errDiv.textContent = 'Error: ' + e.message + ' (after ' + formatElapsed(Date.now() - _runStartTime) + ')';
+    output.appendChild(errDiv);
+  }
+  clearInterval(_runTimer);
+  const prog = document.getElementById('run-progress');
+  if (prog) {
+    const fillEl = document.getElementById('prog-fill');
+    if (fillEl) { fillEl.style.width = '100%'; fillEl.style.boxShadow = '0 0 16px rgba(74,222,128,0.4)'; fillEl.style.background = 'var(--green)'; }
+    const sub = document.getElementById('prog-substep');
+    if (sub) sub.textContent = 'Complete — ' + formatElapsed(Date.now() - _runStartTime);
+    const allSteps = prog.querySelectorAll('.progress-step');
+    allSteps.forEach(function(s) { s.className = 'progress-step done'; });
+    setTimeout(function() { if (prog.parentNode) { prog.style.opacity = '0.4'; prog.style.transition = 'opacity 2s'; } }, 3000);
  }
  btn.disabled = false; btn.textContent = 'Run Team';
 }

 function handleEvent(evt) {
  const output = document.getElementById('output');
-  if (evt.type === 'clear') { output.innerHTML = ''; return; }
+  if (evt.type === 'clear') {
+    const prog = document.getElementById('run-progress');
+    output.textContent = '';
+    if (prog) output.appendChild(prog);
+    return;
+  }
+  if (evt.type === 'progress') {
+    const fill = document.getElementById('prog-fill');
+    const sub = document.getElementById('prog-substep');
+    const stepsDiv = document.getElementById('prog-steps');
+    if (fill && evt.percent != null) fill.style.width = Math.max(2, Math.min(98, evt.percent)) + '%';
+    if (sub && evt.substep) sub.textContent = evt.substep;
+    if (stepsDiv && evt.total_steps) {
+      while (stepsDiv.children.length < evt.total_steps) {
+        const s = document.createElement('div');
+        s.className = 'progress-step';
+        stepsDiv.appendChild(s);
+      }
+      for (let i = 0; i < stepsDiv.children.length; i++) {
+        if (i < evt.step - 1) stepsDiv.children[i].className = 'progress-step done';
+        else if (i === evt.step - 1) stepsDiv.children[i].className = 'progress-step active';
+        else stepsDiv.children[i].className = 'progress-step';
+      }
+    }
+    return;
+  }
  if (evt.type === 'status') {
+    const sub = document.getElementById('prog-substep');
+    if (sub) { sub.textContent = evt.message; return; }
    let bar = output.querySelector('.status-bar');
    if (bar) bar.querySelector('span').textContent = evt.message;
-    else output.innerHTML += `<div class="status-bar"><div class="spinner"></div><span>${evt.message}</span></div>`;
+    else {
+      const newBar = document.createElement('div');
+      newBar.className = 'status-bar';
+      const sp = document.createElement('div');
+      sp.className = 'spinner';
+      const span = document.createElement('span');
+      span.textContent = evt.message;
+      newBar.appendChild(sp);
+      newBar.appendChild(span);
+      output.appendChild(newBar);
+    }
    return;
  }
  if (evt.type === 'done') { const bar = output.querySelector('.status-bar'); if (bar) bar.remove(); return; }
  if (evt.type === 'response') {
+    _runResponseCount++;
    const bar = output.querySelector('.status-bar'); if (bar) bar.remove();
    const mi = availableModels.findIndex(m => m.name === evt.model);
    const color = COLORS[(mi >= 0 ? mi : 0) % COLORS.length];
@ -2870,6 +3008,178 @@ def admin_ollama_models():
        return jsonify({"models": [], "error": str(e)})


+# ─── ADMIN MONITOR ─────────────────────────────────────────────
+
+@app.route("/admin/monitor")
+@admin_required
+def monitor_page():
+    return MONITOR_HTML
+
+@app.route("/api/admin/monitor")
+@admin_required
+def monitor_data():
+    active = []
+    for rid, r in _active_runs.items():
+        active.append({
+            "run_id": rid, "mode": r["mode"], "user": r["user"],
+            "prompt": r["prompt"], "elapsed": round(time.time() - r["started"], 1),
+            "step": r["step"], "total_steps": r["total_steps"],
+            "substep": r["substep"], "events": r["events"],
+            "errors": len(r["errors"]), "responses_size": r["responses_size"],
+            "error_details": r["errors"][-3:]  # last 3 errors
+        })
+    recent = list(reversed(_run_log[-20:]))
+    return jsonify({"active": active, "recent": recent, "timestamp": time.time()})
+
+
+MONITOR_HTML = r"""<!DOCTYPE html>
+<html lang="en"><head>
+<meta charset="UTF-8"><meta name="viewport" content="width=device-width,initial-scale=1.0">
+<title>LLM Team — Monitor</title>
+<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;600;700&family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
+<style>
+:root{--bg:#08090c;--surface:rgba(14,16,22,0.85);--border:#2a2d35;--text:#e8e6e3;--text2:#7a7872;--accent:#e2b55a;--green:#4ade80;--red:#e05252;--blue:#5b9cf5}
+*{box-sizing:border-box;margin:0;padding:0}
+body{font-family:'Inter',sans-serif;background:var(--bg);color:var(--text);min-height:100vh;padding:20px 28px}
+canvas#bg-grid{position:fixed;inset:0;z-index:0;pointer-events:none}
+.scanlines{position:fixed;inset:0;z-index:1;pointer-events:none;background:repeating-linear-gradient(0deg,transparent,transparent 2px,rgba(0,0,0,0.02) 2px,rgba(0,0,0,0.02) 4px)}
+.wrap{position:relative;z-index:10;max-width:1200px;margin:0 auto}
+header{display:flex;align-items:center;gap:14px;padding-bottom:18px;border-bottom:2px solid var(--border);margin-bottom:24px}
+h1{font-family:'JetBrains Mono',monospace;font-size:18px;font-weight:700}
+h1 span{color:var(--accent)}
+.back{color:var(--text2);text-decoration:none;font-size:10px;font-family:'JetBrains Mono',monospace;text-transform:uppercase;letter-spacing:1px;border:2px solid var(--border);padding:5px 12px;border-radius:2px;margin-left:auto}
+.back:hover{border-color:var(--accent);color:var(--accent)}
+.live-dot{width:8px;height:8px;border-radius:50%;background:var(--green);box-shadow:0 0 8px var(--green);animation:pulse-dot 2s ease-in-out infinite}
+@keyframes pulse-dot{0%,100%{opacity:1}50%{opacity:0.5}}
+.section{margin-bottom:28px}
+.section-title{font-family:'JetBrains Mono',monospace;font-size:10px;text-transform:uppercase;letter-spacing:2px;color:var(--accent);margin-bottom:12px;font-weight:700}
+.card{background:var(--surface);border:2px solid var(--border);border-radius:2px;padding:16px;margin-bottom:8px;backdrop-filter:blur(16px)}
+.card.active{border-color:var(--accent);box-shadow:0 0 20px rgba(226,181,90,0.05)}
+.card.error{border-color:var(--red)}
+.card-row{display:flex;align-items:center;gap:12px;margin-bottom:6px;flex-wrap:wrap}
+.tag{font-family:'JetBrains Mono',monospace;font-size:9px;text-transform:uppercase;letter-spacing:1px;padding:3px 8px;border:1px solid;border-radius:1px;font-weight:600}
+.tag-mode{color:var(--accent);border-color:rgba(226,181,90,0.3)}
+.tag-user{color:var(--blue);border-color:rgba(91,156,245,0.3)}
+.tag-time{color:var(--text2);border-color:var(--border)}
+.tag-err{color:var(--red);border-color:rgba(224,82,82,0.3)}
+.tag-ok{color:var(--green);border-color:rgba(74,222,128,0.3)}
+.prompt-text{font-size:12px;color:var(--text2);margin:4px 0 8px;font-style:italic;max-width:600px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}
+.mini-progress{height:4px;background:rgba(0,0,0,0.4);border-radius:1px;overflow:hidden;margin:6px 0}
+.mini-fill{height:100%;background:var(--accent);transition:width 0.5s}
+.substep{font-family:'JetBrains Mono',monospace;font-size:10px;color:var(--text2)}
+.error-line{font-family:'JetBrains Mono',monospace;font-size:10px;color:var(--red);border-left:2px solid var(--red);padding-left:8px;margin:4px 0}
+.stats-grid{display:grid;grid-template-columns:repeat(4,1fr);gap:8px;margin-bottom:24px}
+.stat-box{background:var(--surface);border:2px solid var(--border);border-radius:2px;padding:14px;backdrop-filter:blur(16px);text-align:center}
+.stat-val{font-family:'JetBrains Mono',monospace;font-size:22px;font-weight:700;color:var(--accent)}
+.stat-label{font-family:'JetBrains Mono',monospace;font-size:9px;text-transform:uppercase;letter-spacing:1.5px;color:var(--text2);margin-top:4px}
+.empty{font-family:'JetBrains Mono',monospace;font-size:11px;color:var(--text2);padding:20px;text-align:center;opacity:0.5}
+@media(max-width:768px){.stats-grid{grid-template-columns:repeat(2,1fr)}.card-row{gap:6px}}
+</style>
+</head><body>
+<canvas id="bg-grid"></canvas>
+<div class="scanlines"></div>
+<div class="wrap">
+<header>
+  <div class="live-dot"></div>
+  <h1><span>Monitor</span> // Process View</h1>
+  <a class="back" href="/">← Back to Team</a>
+  <a class="back" href="/admin">Admin</a>
+</header>
+<div class="stats-grid">
+  <div class="stat-box"><div class="stat-val" id="s-active">0</div><div class="stat-label">Active Runs</div></div>
+  <div class="stat-box"><div class="stat-val" id="s-total">0</div><div class="stat-label">Completed</div></div>
+  <div class="stat-box"><div class="stat-val" id="s-errors">0</div><div class="stat-label">Errors</div></div>
+  <div class="stat-box"><div class="stat-val" id="s-avgtime">—</div><div class="stat-label">Avg Duration</div></div>
+</div>
+<div class="section">
+  <div class="section-title">Active Runs</div>
+  <div id="active-runs"><div class="empty">No active runs</div></div>
+</div>
+<div class="section">
+  <div class="section-title">Recent Runs (last 20)</div>
+  <div id="recent-runs"><div class="empty">No recent runs</div></div>
+</div>
+</div>
+<script>
+!function(){const c=document.getElementById('bg-grid');if(!c)return;const x=c.getContext('2d');function resize(){c.width=window.innerWidth;c.height=window.innerHeight}resize();window.addEventListener('resize',resize);let t=0;function draw(){x.clearRect(0,0,c.width,c.height);const s=50,ox=(t*0.2)%s,oy=(t*0.1)%s;x.fillStyle='rgba(226,181,90,0.025)';for(let gx=-s+ox;gx<c.width+s;gx+=s){for(let gy=-s+oy;gy<c.height+s;gy+=s){x.beginPath();x.arc(gx,gy,0.7,0,Math.PI*2);x.fill()}}t++;requestAnimationFrame(draw)}draw()}();
+
+function fmt(s){if(!s&&s!==0)return'—';if(s<60)return Math.round(s)+'s';return Math.floor(s/60)+'m '+Math.round(s%60)+'s'}
+function esc(t){const d=document.createElement('div');d.textContent=t;return d.innerHTML;}
+
+function renderActive(runs){
+  const el=document.getElementById('active-runs');
+  if(!runs.length){el.textContent='';const e=document.createElement('div');e.className='empty';e.textContent='No active runs';el.appendChild(e);return}
+  el.textContent='';
+  runs.forEach(function(r){
+    const c=document.createElement('div');
+    c.className='card active';
+    const row=document.createElement('div');row.className='card-row';
+    row.appendChild(tag(r.mode,'tag-mode'));
+    row.appendChild(tag(r.user,'tag-user'));
+    row.appendChild(tag(fmt(r.elapsed),'tag-time'));
+    row.appendChild(tag(r.events+' events','tag-time'));
+    if(r.errors>0)row.appendChild(tag(r.errors+' errors','tag-err'));
+    c.appendChild(row);
+    const p=document.createElement('div');p.className='prompt-text';p.textContent=r.prompt;c.appendChild(p);
+    if(r.total_steps>0){
+      const mp=document.createElement('div');mp.className='mini-progress';
+      const mf=document.createElement('div');mf.className='mini-fill';
+      const pct=r.total_steps>0?Math.round((r.step/r.total_steps)*100):0;
+      mf.style.width=Math.max(5,pct)+'%';mp.appendChild(mf);c.appendChild(mp);
+    }
+    if(r.substep){const s=document.createElement('div');s.className='substep';s.textContent=r.substep;c.appendChild(s)}
+    if(r.error_details){r.error_details.forEach(function(e){
+      const el2=document.createElement('div');el2.className='error-line';el2.textContent=e.model+': '+e.error;c.appendChild(el2);
+    })}
+    el.appendChild(c);
+  });
+}
+
+function renderRecent(runs){
+  const el=document.getElementById('recent-runs');
+  if(!runs.length){el.textContent='';const e=document.createElement('div');e.className='empty';e.textContent='No recent runs';el.appendChild(e);return}
+  el.textContent='';
+  runs.forEach(function(r){
+    const c=document.createElement('div');
+    c.className='card'+(r.errors&&r.errors.length?' error':'');
+    const row=document.createElement('div');row.className='card-row';
+    row.appendChild(tag(r.mode,'tag-mode'));
+    row.appendChild(tag(r.user,'tag-user'));
+    row.appendChild(tag(fmt(r.duration),'tag-time'));
+    row.appendChild(tag((r.response_count||0)+' responses','tag-time'));
+    if(r.errors&&r.errors.length)row.appendChild(tag(r.errors.length+' errors','tag-err'));
+    else row.appendChild(tag('ok','tag-ok'));
+    c.appendChild(row);
+    const p=document.createElement('div');p.className='prompt-text';p.textContent=r.prompt;c.appendChild(p);
+    if(r.errors&&r.errors.length){r.errors.slice(-2).forEach(function(e){
+      const el2=document.createElement('div');el2.className='error-line';el2.textContent=(e.model||'?')+': '+(e.error||'unknown');c.appendChild(el2);
+    })}
+    el.appendChild(c);
+  });
+}
+
+function tag(text,cls){const t=document.createElement('span');t.className='tag '+cls;t.textContent=text;return t}
+
+async function poll(){
+  try{
+    const r=await fetch('/api/admin/monitor');
+    const d=await r.json();
+    document.getElementById('s-active').textContent=d.active.length;
+    document.getElementById('s-total').textContent=d.recent.length;
+    const errs=d.recent.reduce(function(a,r){return a+((r.errors&&r.errors.length)||0)},0);
+    document.getElementById('s-errors').textContent=errs;
+    const durations=d.recent.filter(function(r){return r.duration}).map(function(r){return r.duration});
+    document.getElementById('s-avgtime').textContent=durations.length?fmt(durations.reduce(function(a,b){return a+b},0)/durations.length):'—';
+    renderActive(d.active);
+    renderRecent(d.recent);
+  }catch(e){console.error('Monitor poll error:',e)}
+}
+poll();
+setInterval(poll,3000);
+</script>
+</body></html>"""
+
+
 # ─── HISTORY ROUTES ────────────────────────────────────────────

@app.route("/api/runs")
@ -3297,6 +3607,19 @@ def lab_stream(eid):
                    headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"})


+# ─── ACTIVE RUN TRACKING ──────────────────────────────────────
+import uuid as _uuid
+
+_active_runs = {}  # run_id -> {mode, user, prompt, started, step, total_steps, substep, events, errors}
+_run_log = []      # recent completed runs with timing/error info (last 100)
+
+def _log_run(run_info):
+    """Archive a completed run to the log."""
+    _run_log.append(run_info)
+    if len(_run_log) > 100:
+        _run_log.pop(0)
+
+
 # ─── TEAM ROUTES ──────────────────────────────────────────────

@app.route("/api/run", methods=["POST"])
@ -3328,21 +3651,60 @@ def run_team():
        "research": run_research, "eval": run_eval, "extract": run_extract,
    }

+    run_id = str(_uuid.uuid4())[:8]
+    username = session.get("username", "unknown")
+    _active_runs[run_id] = {
+        "mode": mode, "user": username, "prompt": prompt[:100],
+        "started": time.time(), "step": 0, "total_steps": 0,
+        "substep": "", "events": 0, "errors": [],
+        "responses_size": 0
+    }
+
    def generate():
        collected = []
-        runner = RUNNERS.get(mode)
-        if runner:
-            for event_str in runner(config):
-                yield event_str
-                try:
-                    data = json.loads(event_str.replace("data: ", "", 1).strip())
-                    if data.get("type") == "response":
-                        collected.append({"model": data.get("model", ""), "text": data.get("text", ""), "role": data.get("role", "")})
-                except Exception:
-                    pass
-        else:
-            yield sse({"type": "response", "model": "system", "text": f"Unknown mode: {mode}", "role": "error"})
-        yield sse({"type": "done"})
+        run = _active_runs[run_id]
+        last_heartbeat = time.time()
+        try:
+            runner = RUNNERS.get(mode)
+            if runner:
+                for event_str in runner(config):
+                    yield event_str
+                    run["events"] += 1
+                    try:
+                        data = json.loads(event_str.replace("data: ", "", 1).strip())
+                        evt_type = data.get("type")
+                        if evt_type == "response":
+                            text = data.get("text", "")
+                            run["responses_size"] += len(text)
+                            collected.append({"model": data.get("model", ""), "text": text, "role": data.get("role", "")})
+                            if data.get("role") == "error":
+                                run["errors"].append({"model": data.get("model"), "error": text[:200], "time": time.time()})
+                        elif evt_type == "progress":
+                            run["step"] = data.get("step", run["step"])
+                            run["total_steps"] = data.get("total_steps", run["total_steps"])
+                            run["substep"] = data.get("substep", "")
+                        elif evt_type == "status":
+                            run["substep"] = data.get("message", "")
+                    except Exception:
+                        pass
+                    # SSE keepalive — prevent nginx/browser timeout during gaps
+                    now = time.time()
+                    if now - last_heartbeat > 15:
+                        yield ": keepalive\n\n"
+                        last_heartbeat = now
+            else:
+                yield sse({"type": "response", "model": "system", "text": f"Unknown mode: {mode}", "role": "error"})
+            yield sse({"type": "done"})
+        except Exception as e:
+            run["errors"].append({"model": "system", "error": str(e)[:500], "time": time.time()})
+            yield sse({"type": "response", "model": "system", "text": f"Pipeline error: {e}", "role": "error"})
+            yield sse({"type": "done"})
+        finally:
+            run["finished"] = time.time()
+            run["duration"] = round(run["finished"] - run["started"], 1)
+            run["response_count"] = len(collected)
+            _log_run(dict(run, run_id=run_id))
+            _active_runs.pop(run_id, None)
        if collected:
            save_run(mode, config.get("prompt", ""), config, collected)

@ -3355,12 +3717,17 @@ def run_team():
 def run_brainstorm(config):
    models, prompt = config.get("models", []), config["prompt"]
    synthesizer = config.get("synthesizer", models[0] if models else "qwen2.5")
+    total = 2 if len(models) > 1 else 1
    yield sse({"type": "clear"})
+    yield sse({"type": "progress", "step": 1, "total_steps": total, "substep": f"Querying {len(models)} models in parallel...", "percent": 10})
    yield sse({"type": "status", "message": f"Querying {len(models)} models..."})
    responses = parallel_query(models, prompt)
-    for m, r in responses.items():
+    for i, (m, r) in enumerate(responses.items()):
+        pct = 10 + int(((i + 1) / len(responses)) * 50)
+        yield sse({"type": "progress", "step": 1, "total_steps": total, "substep": f"{i+1}/{len(responses)} models responded", "percent": pct})
        yield sse({"type": "response", "model": m, "text": r, "role": "respondent"})
    if len(responses) > 1:
+        yield sse({"type": "progress", "step": 2, "total_steps": total, "substep": f"Synthesizing with {synthesizer}...", "percent": 70})
        yield sse({"type": "status", "message": f"Synthesizing with {synthesizer}..."})
        parts = [("QUESTION:", prompt, 1), ("INSTRUCTION:", "Synthesize the best answer. Be concise.", 1)]
        for m, r in responses.items():
@ -3370,6 +3737,7 @@ def run_brainstorm(config):
            yield sse({"type": "response", "model": synthesizer, "text": safe_query(synthesizer, sp), "role": "synthesis"})
        except Exception as e:
            yield sse({"type": "response", "model": synthesizer, "text": str(e), "role": "error"})
+    yield sse({"type": "progress", "step": total, "total_steps": total, "substep": "Complete", "percent": 100})


 def run_pipeline(config):
@ -3986,13 +4354,15 @@ def run_research(config):
    models = config.get("models", [])
    checker = config.get("checker", models[0] if models else scout)
    synth = config.get("synthesizer", models[0] if models else scout)
-    num_q = config.get("num_questions", 5)
+    num_q = min(config.get("num_questions", 5), 15)  # hard cap at 15
    yield sse({"type": "clear"})
+    total_steps = 4
    steps = []
    all_models = [scout, checker, synth] + models

    # Step 1: Scout generates research questions
-    yield sse({"type": "status", "message": f"Step 1/4: {scout} generating {num_q} research questions..."})
+    yield sse({"type": "progress", "step": 1, "total_steps": total_steps, "substep": f"{scout} generating {num_q} research questions...", "percent": 5})
+    yield sse({"type": "status", "message": f"Step 1/{total_steps}: {scout} generating {num_q} research questions..."})
    try:
        q_prompt = (
            f"You are a research scout. Given the topic below, generate exactly {num_q} specific, "
@ -4013,9 +4383,14 @@ def run_research(config):
        yield sse({"type": "response", "model": "system", "text": "Failed to parse research questions.", "role": "error"})
        return

+    yield sse({"type": "progress", "step": 1, "total_steps": total_steps, "substep": f"Parsed {len(questions)} questions", "percent": 15})
+
    # Step 2: Parallel research — distribute questions across models
-    yield sse({"type": "status", "message": f"Step 2/4: {len(models)} models researching {len(questions)} questions..."})
+    yield sse({"type": "progress", "step": 2, "total_steps": total_steps, "substep": f"0/{len(questions)} questions researched...", "percent": 18})
+    yield sse({"type": "status", "message": f"Step 2/{total_steps}: {len(models)} models researching {len(questions)} questions..."})
    research_results = {}
+    completed_q = 0
+    failed_q = 0
    with ThreadPoolExecutor(max_workers=max(len(models), 1)) as pool:
        futures = {}
        for i, q in enumerate(questions):
@ -4026,18 +4401,33 @@ def run_research(config):
            m, q = futures[future]
            try:
                answer = future.result()
+                # Cap individual research answers to prevent context explosion
+                if len(answer) > 8000:
+                    answer = answer[:7500] + "\n\n[... response truncated for pipeline stability ...]"
                research_results[q] = {"model": m, "answer": answer}
+                completed_q += 1
+                pct = 18 + int((completed_q / len(questions)) * 42)
+                yield sse({"type": "progress", "step": 2, "total_steps": total_steps, "substep": f"{completed_q}/{len(questions)} questions researched", "percent": pct})
                yield sse({"type": "response", "model": m, "text": f"Q: {q}\n\n{answer}", "role": "researcher"})
            except Exception as e:
+                failed_q += 1
+                completed_q += 1
+                pct = 18 + int((completed_q / len(questions)) * 42)
+                yield sse({"type": "progress", "step": 2, "total_steps": total_steps, "substep": f"{completed_q}/{len(questions)} ({failed_q} failed)", "percent": pct})
                yield sse({"type": "response", "model": m, "text": f"Q: {q}\n\nError: {e}", "role": "error"})
                research_results[q] = {"model": m, "answer": f"Error: {e}"}
    steps.append({"step": "research", "results": {q: r["answer"][:500] for q, r in research_results.items()}})

-    # Step 3: Fact-check
-    yield sse({"type": "status", "message": f"Step 3/4: {checker} fact-checking all findings..."})
+    # Step 3: Fact-check — cap context to prevent OOM
+    yield sse({"type": "progress", "step": 3, "total_steps": total_steps, "substep": f"{checker} fact-checking...", "percent": 62})
+    yield sse({"type": "status", "message": f"Step 3/{total_steps}: {checker} fact-checking all findings..."})
    check_prompt = f"Topic: {prompt}\n\nResearch findings to fact-check:\n\n"
+    # Smart truncation: fit within context limits
+    per_answer_cap = min(300, 3000 // max(len(research_results), 1))
    for q, r in research_results.items():
-        check_prompt += f"Q: {q}\nA: {r['answer'][:300]}\n\n"
+        if r["answer"].startswith("Error:"):
+            continue
+        check_prompt += f"Q: {q}\nA: {r['answer'][:per_answer_cap]}\n\n"
    check_prompt += (
        "For each finding, mark as:\n"
        "  VERIFIED — likely accurate\n"
@ -4053,11 +4443,16 @@ def run_research(config):
        check_result = f"Error: {e}"
        yield sse({"type": "response", "model": checker, "text": str(e), "role": "error"})

-    # Step 4: Synthesize into brief
-    yield sse({"type": "status", "message": f"Step 4/4: {synth} synthesizing research brief..."})
+    # Step 4: Synthesize into brief — cap context
+    yield sse({"type": "progress", "step": 4, "total_steps": total_steps, "substep": f"{synth} synthesizing brief...", "percent": 80})
+    yield sse({"type": "status", "message": f"Step 4/{total_steps}: {synth} synthesizing research brief..."})
    synth_prompt = f"Topic: {prompt}\n\nResearch findings:\n\n"
+    per_synth_cap = min(400, 4000 // max(len(research_results), 1))
    for q, r in research_results.items():
-        synth_prompt += f"Q: {q}\nA: {r['answer'][:400]}\n\n"
+        if r["answer"].startswith("Error:"):
+            synth_prompt += f"Q: {q}\nA: [research failed]\n\n"
+        else:
+            synth_prompt += f"Q: {q}\nA: {r['answer'][:per_synth_cap]}\n\n"
    synth_prompt += f"\nFact-check notes:\n{check_result[:500]}\n\n"
    synth_prompt += (
        "Synthesize ALL findings into a structured research brief with these sections:\n"
@ -4076,6 +4471,8 @@ def run_research(config):
        brief = f"Error: {e}"
        yield sse({"type": "response", "model": synth, "text": str(e), "role": "error"})

+    yield sse({"type": "progress", "step": 4, "total_steps": total_steps, "substep": "Research complete", "percent": 100})
+
    # Save pipeline run
    _save_pipeline("research", prompt, steps, {"brief": brief, "questions": questions, "fact_check": check_result[:1000]}, all_models, start)