diff --git a/llm_team_ui.py b/llm_team_ui.py index 12595c5..aadae10 100644 --- a/llm_team_ui.py +++ b/llm_team_ui.py @@ -1776,16 +1776,73 @@ def get_db(): def save_run(mode, prompt, config_data, responses): models = list({r.get("model", "") for r in responses if r.get("model")}) + run_id = None try: with get_db() as conn: with conn.cursor() as cur: cur.execute( - "INSERT INTO team_runs (mode, prompt, config, responses, models_used) VALUES (%s, %s, %s, %s, %s)", + "INSERT INTO team_runs (mode, prompt, config, responses, models_used) VALUES (%s, %s, %s, %s, %s) RETURNING id", (mode, prompt, json.dumps(config_data), json.dumps(responses), models) ) + run_id = cur.fetchone()[0] conn.commit() except Exception as e: print(f"[DB] save_run error: {e}") + if run_id and responses: + threading.Thread(target=_auto_score_run, args=(run_id, mode, prompt, responses), daemon=True).start() + return run_id + + +# ─── AUTO-SCORING ENGINE ───────────────────────────────────── +_SCORE_MODEL = "qwen2.5:latest" + +def _auto_score_run(run_id, mode, prompt, responses): + """Background: auto-score a completed run via judge model.""" + try: + # Pick the longest non-error response as representative + candidates = [r for r in responses if r.get("role") != "error" and r.get("text")] + if not candidates: + return + best = max(candidates, key=lambda r: len(r.get("text", ""))) + text = best["text"][:3000] + + judge_prompt = ( + f"Rate the quality of this AI response on a scale of 1-10.\n" + f"Consider: relevance to the prompt, completeness, accuracy, clarity, usefulness.\n\n" + f"PROMPT: {prompt[:500]}\n\n" + f"MODE: {mode}\n\n" + f"RESPONSE:\n{text}\n\n" + f"Return ONLY a JSON object: {{\"score\": N, \"reason\": \"one sentence\"}}" + ) + judgment = query_model(_SCORE_MODEL, judge_prompt) + + # Parse score + score = None + try: + j_start = judgment.find("{") + j_end = judgment.rfind("}") + 1 + if j_start >= 0 and j_end > j_start: + parsed = json.loads(judgment[j_start:j_end]) + score = float(parsed.get("score", 0)) + except Exception: + pass + if score is None: + m = re.search(r'\b([1-9]|10)\b', judgment) + score = float(m.group(1)) if m else None + if score is None or score < 1 or score > 10: + return + + with get_db() as conn: + with conn.cursor() as cur: + cur.execute( + "UPDATE team_runs SET quality_score = %s, score_method = 'auto', score_metadata = %s WHERE id = %s AND (score_method IS NULL OR score_method = 'auto')", + (score, json.dumps({"judge": _SCORE_MODEL, "judgment": judgment[:500], "scored_model": best.get("model", ""), "reason": judgment[:200]}), run_id) + ) + conn.commit() + print(f"[SCORE] run {run_id} scored {score}/10 by {_SCORE_MODEL}") + except Exception as e: + print(f"[SCORE] auto-score error for run {run_id}: {e}") + HTML = r""" @@ -2881,6 +2938,7 @@ function buildConfig() { let _runStartTime = 0; let _runTimer = null; +let _lastRunId = null; let _runEventCount = 0; let _runResponseCount = 0; let _runTotalChars = 0; @@ -3132,6 +3190,11 @@ function handleEvent(evt) { return; } if (evt.type === 'done') { const bar = output.querySelector('.status-bar'); if (bar) bar.remove(); return; } + if (evt.type === 'run_saved') { + _lastRunId = evt.run_id; + document.querySelectorAll('.vote-btn').forEach(function(b) { b.disabled = false; }); + return; + } if (evt.type === 'response') { _runResponseCount++; _runTotalChars += (evt.text || '').length; @@ -3150,6 +3213,14 @@ function handleEvent(evt) { label.textContent = phaseName; output.appendChild(label); } + // Reactive pipeline notification — not a full card + if (evt.role === 'reactive') { + var note = document.createElement('div'); + note.style.cssText = 'font-family:JetBrains Mono,monospace;font-size:10px;color:var(--accent);border:1px dashed var(--accent);border-radius:2px;padding:8px 12px;margin:4px 0;opacity:0.8;font-style:italic'; + note.textContent = '\u26A1 ' + evt.text; + output.appendChild(note); + return; + } const mi = availableModels.findIndex(m => m.name === evt.model); const color = COLORS[(mi >= 0 ? mi : 0) % COLORS.length]; const displayName = mi >= 0 ? (availableModels[mi].display_name || evt.model) : evt.model; @@ -3161,7 +3232,7 @@ function handleEvent(evt) { const roleTag = evt.role ? `${evt.role}` : ''; const uid = 'resp-' + Date.now() + '-' + Math.random().toString(36).substr(2,4); const errorLink = isError ? `View error details in monitor →` : ''; - card.innerHTML = `