Compare commits
4 Commits
e5e17a71a7
...
205eff64b4
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
205eff64b4 | ||
|
|
34ee12e7ed | ||
|
|
98bda6e337 | ||
|
|
fa6ccff079 |
424
llm_team_ui.py
424
llm_team_ui.py
@ -1842,6 +1842,7 @@ DEFAULT_CONFIG = {
|
|||||||
"providers": {
|
"providers": {
|
||||||
"ollama": {"enabled": True, "base_url": "http://localhost:11434", "timeout": 300},
|
"ollama": {"enabled": True, "base_url": "http://localhost:11434", "timeout": 300},
|
||||||
"openrouter": {"enabled": False, "base_url": "https://openrouter.ai/api/v1", "api_key": "", "timeout": 120},
|
"openrouter": {"enabled": False, "base_url": "https://openrouter.ai/api/v1", "api_key": "", "timeout": 120},
|
||||||
|
"ollama_cloud": {"enabled": False, "base_url": "https://ollama.com", "api_key": "", "timeout": 180},
|
||||||
"openai": {"enabled": False, "base_url": "https://api.openai.com/v1", "api_key": "", "timeout": 120},
|
"openai": {"enabled": False, "base_url": "https://api.openai.com/v1", "api_key": "", "timeout": 120},
|
||||||
"anthropic": {"enabled": False, "base_url": "https://api.anthropic.com/v1", "api_key": "", "timeout": 120},
|
"anthropic": {"enabled": False, "base_url": "https://api.anthropic.com/v1", "api_key": "", "timeout": 120},
|
||||||
},
|
},
|
||||||
@ -1884,7 +1885,7 @@ def get_api_key(provider_name):
|
|||||||
key = prov.get("api_key", "")
|
key = prov.get("api_key", "")
|
||||||
if key:
|
if key:
|
||||||
return key
|
return key
|
||||||
env_map = {"openrouter": "OPENROUTER_API_KEY", "openai": "OPENAI_API_KEY", "anthropic": "ANTHROPIC_API_KEY"}
|
env_map = {"openrouter": "OPENROUTER_API_KEY", "openai": "OPENAI_API_KEY", "anthropic": "ANTHROPIC_API_KEY", "ollama_cloud": "OLLAMA_CLOUD_API_KEY"}
|
||||||
return os.environ.get(env_map.get(provider_name, ""), "")
|
return os.environ.get(env_map.get(provider_name, ""), "")
|
||||||
|
|
||||||
DB_DSN = "dbname=knowledge_base user=kbuser password=IPbLBA0EQI8u4TeM2YZrbm1OAy5nSwqC host=localhost"
|
DB_DSN = "dbname=knowledge_base user=kbuser password=IPbLBA0EQI8u4TeM2YZrbm1OAy5nSwqC host=localhost"
|
||||||
@ -1966,13 +1967,17 @@ def cache_store(cache_key, prompt, mode, models, run_id, score, responses):
|
|||||||
|
|
||||||
def save_run(mode, prompt, config_data, responses):
|
def save_run(mode, prompt, config_data, responses):
|
||||||
models = list({r.get("model", "") for r in responses if r.get("model")})
|
models = list({r.get("model", "") for r in responses if r.get("model")})
|
||||||
|
# Calculate token usage from actual content
|
||||||
|
input_chars = len(prompt)
|
||||||
|
output_chars = sum(len(r.get("text", "")) for r in responses if r.get("text"))
|
||||||
|
est_tokens = estimate_tokens(prompt) + sum(estimate_tokens(r.get("text", "")) for r in responses if r.get("text"))
|
||||||
run_id = None
|
run_id = None
|
||||||
try:
|
try:
|
||||||
with get_db() as conn:
|
with get_db() as conn:
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"INSERT INTO team_runs (mode, prompt, config, responses, models_used) VALUES (%s, %s, %s, %s, %s) RETURNING id",
|
"INSERT INTO team_runs (mode, prompt, config, responses, models_used, est_tokens, input_chars, output_chars) VALUES (%s, %s, %s, %s, %s, %s, %s, %s) RETURNING id",
|
||||||
(mode, prompt, json.dumps(config_data), json.dumps(responses), models)
|
(mode, prompt, json.dumps(config_data), json.dumps(responses), models, est_tokens, input_chars, output_chars)
|
||||||
)
|
)
|
||||||
run_id = cur.fetchone()[0]
|
run_id = cur.fetchone()[0]
|
||||||
conn.commit()
|
conn.commit()
|
||||||
@ -2114,6 +2119,7 @@ HTML = r"""
|
|||||||
.model-card .meta { font-size: 10px; color: var(--text2); font-family: 'JetBrains Mono', monospace; }
|
.model-card .meta { font-size: 10px; color: var(--text2); font-family: 'JetBrains Mono', monospace; }
|
||||||
.prov-badge { font-size: 8px; padding: 2px 6px; border-radius: 1px; font-weight: 700; text-transform: uppercase; letter-spacing: 0.8px; font-family: 'JetBrains Mono', monospace; border: 1px solid; }
|
.prov-badge { font-size: 8px; padding: 2px 6px; border-radius: 1px; font-weight: 700; text-transform: uppercase; letter-spacing: 0.8px; font-family: 'JetBrains Mono', monospace; border: 1px solid; }
|
||||||
.prov-badge.ollama { background: rgba(74,222,128,0.08); color: var(--green); border-color: rgba(74,222,128,0.2); }
|
.prov-badge.ollama { background: rgba(74,222,128,0.08); color: var(--green); border-color: rgba(74,222,128,0.2); }
|
||||||
|
.prov-badge.ollama_cloud { background: rgba(245,245,245,0.08); color: #e6edf3; border-color: rgba(245,245,245,0.2); }
|
||||||
.prov-badge.openrouter { background: rgba(91,156,245,0.08); color: var(--blue); border-color: rgba(91,156,245,0.2); }
|
.prov-badge.openrouter { background: rgba(91,156,245,0.08); color: var(--blue); border-color: rgba(91,156,245,0.2); }
|
||||||
.prov-badge.openai { background: rgba(226,181,90,0.08); color: var(--accent2); border-color: rgba(226,181,90,0.2); }
|
.prov-badge.openai { background: rgba(226,181,90,0.08); color: var(--accent2); border-color: rgba(226,181,90,0.2); }
|
||||||
.prov-badge.anthropic { background: rgba(236,72,153,0.08); color: #ec4899; border-color: rgba(236,72,153,0.2); }
|
.prov-badge.anthropic { background: rgba(236,72,153,0.08); color: #ec4899; border-color: rgba(236,72,153,0.2); }
|
||||||
@ -2488,6 +2494,7 @@ HTML = r"""
|
|||||||
<div class="mode-tab" data-mode="refine" onclick="setMode('refine')" style="border-color:var(--accent);border-width:1px">Auto-Refine<small>AI pipeline</small></div>
|
<div class="mode-tab" data-mode="refine" onclick="setMode('refine')" style="border-color:var(--accent);border-width:1px">Auto-Refine<small>AI pipeline</small></div>
|
||||||
<div class="mode-tab" data-mode="extract" onclick="setMode('extract')" style="border-color:var(--blue);border-width:1px">Knowledge<small>Extract facts</small></div>
|
<div class="mode-tab" data-mode="extract" onclick="setMode('extract')" style="border-color:var(--blue);border-width:1px">Knowledge<small>Extract facts</small></div>
|
||||||
<div class="mode-tab" data-mode="adaptive" onclick="setMode('adaptive')" style="border-color:var(--green);border-width:1px;background:rgba(74,222,128,0.04)">Adaptive<small>Self-eval + RAG</small></div>
|
<div class="mode-tab" data-mode="adaptive" onclick="setMode('adaptive')" style="border-color:var(--green);border-width:1px;background:rgba(74,222,128,0.04)">Adaptive<small>Self-eval + RAG</small></div>
|
||||||
|
<div class="mode-tab" data-mode="deep_analysis" onclick="setMode('deep_analysis')" style="border-color:#d946ef;border-width:2px;background:rgba(217,70,239,0.06)">Deep Analysis<small>Full pipeline</small></div>
|
||||||
</div>
|
</div>
|
||||||
<div class="mode-desc" id="mode-desc">All models answer in parallel, then one synthesizes the best parts into a final answer.</div>
|
<div class="mode-desc" id="mode-desc">All models answer in parallel, then one synthesizes the best parts into a final answer.</div>
|
||||||
|
|
||||||
@ -2656,6 +2663,22 @@ HTML = r"""
|
|||||||
<div class="config-row"><label>Confidence Threshold</label><input type="number" id="adaptive-confidence" value="0.7" min="0.3" max="0.95" step="0.05" style="width:70px;flex:none"></div>
|
<div class="config-row"><label>Confidence Threshold</label><input type="number" id="adaptive-confidence" value="0.7" min="0.3" max="0.95" step="0.05" style="width:70px;flex:none"></div>
|
||||||
<div style="font-size:10px;color:var(--text2);margin-top:6px;line-height:1.5;font-family:'JetBrains Mono',monospace">Models self-evaluate confidence. Below threshold → retrieves context from knowledge base → escalates to next model. Order models from weakest to strongest. Successful responses are stored for future RAG retrieval.</div>
|
<div style="font-size:10px;color:var(--text2);margin-top:6px;line-height:1.5;font-family:'JetBrains Mono',monospace">Models self-evaluate confidence. Below threshold → retrieves context from knowledge base → escalates to next model. Order models from weakest to strongest. Successful responses are stored for future RAG retrieval.</div>
|
||||||
</div>
|
</div>
|
||||||
|
<!-- DEEP ANALYSIS -->
|
||||||
|
<div id="config-deep_analysis" class="config-section" style="display:none">
|
||||||
|
<h2>Cloud Models (select 2+ for best results)</h2>
|
||||||
|
<div class="model-list" id="ml-deep_analysis"></div>
|
||||||
|
<div class="config-row"><label>Final Synthesizer</label><select id="deep_analysis-synthesizer"></select></div>
|
||||||
|
<div style="font-size:10px;color:var(--text2);margin-top:6px;line-height:1.5;font-family:'JetBrains Mono',monospace;border-left:2px solid #d946ef;padding-left:10px">
|
||||||
|
<strong style="color:#d946ef">6-Phase Pipeline:</strong><br>
|
||||||
|
1. Research — all models answer in parallel<br>
|
||||||
|
2. Debate — models challenge each other's findings<br>
|
||||||
|
3. Consensus — merge research + critiques<br>
|
||||||
|
4. Self-Eval — score for accuracy, completeness, nuance<br>
|
||||||
|
5. Final Synthesis — strongest model produces definitive answer<br>
|
||||||
|
6. Knowledge Base — result stored for future RAG retrieval<br><br>
|
||||||
|
Use your strongest cloud models here. Results train the local knowledge base so future adaptive runs benefit.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div><!-- end m-collapse -->
|
</div><!-- end m-collapse -->
|
||||||
<div class="panel">
|
<div class="panel">
|
||||||
@ -3155,7 +3178,7 @@ let availableModels = [];
|
|||||||
let currentMode = 'brainstorm';
|
let currentMode = 'brainstorm';
|
||||||
|
|
||||||
const modelSets = {};
|
const modelSets = {};
|
||||||
const ML_IDS = ['ml-brainstorm','ml-validator','ml-roundrobin','ml-consensus','ml-ladder','ml-tournament','ml-evolution','ml-blindassembly','ml-mesh','ml-hallucination','ml-research','ml-eval','ml-refine'];
|
const ML_IDS = ['ml-brainstorm','ml-validator','ml-roundrobin','ml-consensus','ml-ladder','ml-tournament','ml-evolution','ml-blindassembly','ml-mesh','ml-hallucination','ml-research','ml-eval','ml-refine','ml-adaptive','ml-deep_analysis'];
|
||||||
|
|
||||||
const MODE_DESCS = {
|
const MODE_DESCS = {
|
||||||
brainstorm: 'All models answer in parallel, then one synthesizes the best parts.',
|
brainstorm: 'All models answer in parallel, then one synthesizes the best parts.',
|
||||||
@ -3179,7 +3202,8 @@ const MODE_DESCS = {
|
|||||||
eval: 'AUTONOMOUS: Same prompts sent to all selected models. Judge scores each on accuracy, reasoning, clarity. Produces a ranked leaderboard across multiple rounds.',
|
eval: 'AUTONOMOUS: Same prompts sent to all selected models. Judge scores each on accuracy, reasoning, clarity. Produces a ranked leaderboard across multiple rounds.',
|
||||||
extract: 'AUTONOMOUS: Extracts structured facts, entities, and relationships from text or local docs. Verifier cross-checks claims. Output saved as queryable JSON.',
|
extract: 'AUTONOMOUS: Extracts structured facts, entities, and relationships from text or local docs. Verifier cross-checks claims. Output saved as queryable JSON.',
|
||||||
refine: 'AUTONOMOUS: AI analyzes your content, selects the best refinement stages (critique, expand, structure, validate, etc.), and runs them in the optimal order. Turns a good draft into a polished final version.',
|
refine: 'AUTONOMOUS: AI analyzes your content, selects the best refinement stages (critique, expand, structure, validate, etc.), and runs them in the optimal order. Turns a good draft into a polished final version.',
|
||||||
adaptive: 'ADAPTIVE: Each model self-evaluates its confidence. If below threshold, the pipeline retrieves context from a vectorized knowledge base and escalates to a stronger model. Successful responses are stored for future RAG retrieval. The system gets smarter with every run.'
|
adaptive: 'ADAPTIVE: Each model self-evaluates its confidence. If below threshold, the pipeline retrieves context from a vectorized knowledge base and escalates to a stronger model. Successful responses are stored for future RAG retrieval. The system gets smarter with every run.',
|
||||||
|
deep_analysis: 'DEEP ANALYSIS: 6-phase autonomous pipeline — Research (all models) → Debate (challenge findings) → Consensus (merge perspectives) → Self-Eval (score quality) → Final Synthesis (strongest model) → Knowledge Base (store for future RAG). Designed for cloud models. Results train local models.'
|
||||||
};
|
};
|
||||||
|
|
||||||
const SAMPLE_PROMPTS = {
|
const SAMPLE_PROMPTS = {
|
||||||
@ -3600,6 +3624,19 @@ const SAMPLE_PROMPTS = {
|
|||||||
'Design a privacy-preserving federated learning system for healthcare where patient data never leaves hospital networks but a central model improves from all participants. Address differential privacy, secure aggregation, and regulatory compliance.',
|
'Design a privacy-preserving federated learning system for healthcare where patient data never leaves hospital networks but a central model improves from all participants. Address differential privacy, secure aggregation, and regulatory compliance.',
|
||||||
'Build an autonomous incident response system that correlates alerts from 15 monitoring tools, classifies severity, executes runbooks, and escalates to humans only when confidence is below threshold.',
|
'Build an autonomous incident response system that correlates alerts from 15 monitoring tools, classifies severity, executes runbooks, and escalates to humans only when confidence is below threshold.',
|
||||||
'Design a real-time stream processing platform handling 1M events/sec with exactly-once semantics, schema evolution, time-travel debugging, and automatic partition rebalancing across 100 nodes.'
|
'Design a real-time stream processing platform handling 1M events/sec with exactly-once semantics, schema evolution, time-travel debugging, and automatic partition rebalancing across 100 nodes.'
|
||||||
|
]},
|
||||||
|
deep_analysis: { basic: [
|
||||||
|
'What is the most effective approach to implementing AI in a staffing agency that currently uses spreadsheets and phone calls?',
|
||||||
|
'Compare the costs and benefits of building vs buying an internal data platform for a 200-person company.',
|
||||||
|
'How should a company evaluate whether to adopt a local LLM deployment vs cloud API for sensitive internal data?'
|
||||||
|
], mid: [
|
||||||
|
'Design a hybrid search architecture that combines SQL filtering with vector semantic search for a database of 500K worker profiles. Address recall, latency, and ranking.',
|
||||||
|
'What is the optimal strategy for a staffing company to use AI to predict workforce demand from public building permit data? Cover data sources, models, and integration.',
|
||||||
|
'Design a learning feedback loop where every user interaction with a search system improves future results. Address cold start, data quality, and convergence.'
|
||||||
|
], advanced: [
|
||||||
|
'Design a complete AI-powered staffing platform that anticipates client needs before they call, pre-matches workers to contracts, learns from every placement, and handles the sparse data problem where new clients have only a name and phone number. Address architecture, data pipeline, AI models, and the change management challenge of convincing skeptical staffers.',
|
||||||
|
'Architect a system that ingests real-time public data (building permits, government contracts, economic indicators) to predict regional labor demand 3-6 months ahead, cross-references with an existing workforce database, and automatically generates recruiting strategies for identified gaps.',
|
||||||
|
'Design an AI system that can be trusted by non-technical users who are actively hostile to AI adoption. Cover transparency, explainability, graceful degradation, and the specific UX patterns that build trust over time.'
|
||||||
]}
|
]}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -3734,6 +3771,7 @@ function populateAllSelects() {
|
|||||||
'staircase-challenger','drift-target','drift-analyzer','mesh-synthesizer','halluc-answerer',
|
'staircase-challenger','drift-target','drift-analyzer','mesh-synthesizer','halluc-answerer',
|
||||||
'timeloop-answerer','timeloop-chaos',
|
'timeloop-answerer','timeloop-chaos',
|
||||||
'research-scout','research-checker','research-synth',
|
'research-scout','research-checker','research-synth',
|
||||||
|
'adaptive-synthesizer','deep_analysis-synthesizer',
|
||||||
'eval-judge','extract-model','extract-verifier','refine-orchestrator'];
|
'eval-judge','extract-model','extract-verifier','refine-orchestrator'];
|
||||||
ids.forEach(id => {
|
ids.forEach(id => {
|
||||||
const el = document.getElementById(id);
|
const el = document.getElementById(id);
|
||||||
@ -3817,6 +3855,7 @@ function buildConfig() {
|
|||||||
case 'extract': c.extractor = getVal('extract-model'); c.verifier = getVal('extract-verifier'); c.source = getVal('extract-source'); break;
|
case 'extract': c.extractor = getVal('extract-model'); c.verifier = getVal('extract-verifier'); c.source = getVal('extract-source'); break;
|
||||||
case 'refine': c.orchestrator = getVal('refine-orchestrator'); c.models = getModels('ml-refine'); c.max_stages = getNum('refine-stages'); break;
|
case 'refine': c.orchestrator = getVal('refine-orchestrator'); c.models = getModels('ml-refine'); c.max_stages = getNum('refine-stages'); break;
|
||||||
case 'adaptive': c.models = getModels('ml-adaptive'); c.synthesizer = getVal('adaptive-synthesizer'); c.confidence_threshold = parseFloat(document.getElementById('adaptive-confidence').value) || 0.7; break;
|
case 'adaptive': c.models = getModels('ml-adaptive'); c.synthesizer = getVal('adaptive-synthesizer'); c.confidence_threshold = parseFloat(document.getElementById('adaptive-confidence').value) || 0.7; break;
|
||||||
|
case 'deep_analysis': c.models = getModels('ml-deep_analysis'); c.synthesizer = getVal('deep_analysis-synthesizer'); break;
|
||||||
}
|
}
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
@ -4955,6 +4994,7 @@ ADMIN_HTML = r"""
|
|||||||
<div class="tabs">
|
<div class="tabs">
|
||||||
<div class="tab active" onclick="switchTab('providers')">Providers</div>
|
<div class="tab active" onclick="switchTab('providers')">Providers</div>
|
||||||
<div class="tab" onclick="switchTab('models')">Models</div>
|
<div class="tab" onclick="switchTab('models')">Models</div>
|
||||||
|
<div class="tab" onclick="switchTab('ollama_cloud')">Ollama Cloud</div>
|
||||||
<div class="tab" onclick="switchTab('openrouter')">OpenRouter</div>
|
<div class="tab" onclick="switchTab('openrouter')">OpenRouter</div>
|
||||||
<div class="tab" onclick="switchTab('timeouts')">Timeouts</div>
|
<div class="tab" onclick="switchTab('timeouts')">Timeouts</div>
|
||||||
<div class="tab" onclick="switchTab('security')">Security</div>
|
<div class="tab" onclick="switchTab('security')">Security</div>
|
||||||
@ -4970,6 +5010,15 @@ ADMIN_HTML = r"""
|
|||||||
<div class="row"><label>Timeout (s)</label><input id="ollama-timeout" type="number" value="300" style="width:80px;flex:none" onchange="updateProvider('ollama')">
|
<div class="row"><label>Timeout (s)</label><input id="ollama-timeout" type="number" value="300" style="width:80px;flex:none" onchange="updateProvider('ollama')">
|
||||||
<button class="btn" onclick="testProvider('ollama')">Test</button></div>
|
<button class="btn" onclick="testProvider('ollama')">Test</button></div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="card" id="prov-ollama_cloud">
|
||||||
|
<h3><div class="prov-dot" style="background:var(--accent2)"></div> Ollama Cloud
|
||||||
|
<label class="toggle" style="margin-left:auto"><input type="checkbox" id="ollama_cloud-enabled" onchange="updateProvider('ollama_cloud')"><span class="slider"></span></label></h3>
|
||||||
|
<div class="row"><label>API Key</label><input id="ollama_cloud-key" type="password" placeholder="Ollama API key" onchange="updateProvider('ollama_cloud')">
|
||||||
|
<button class="btn btn-sm" onclick="toggleVis('ollama_cloud-key')">Show</button></div>
|
||||||
|
<div class="row"><label>Base URL</label><input id="ollama_cloud-url" value="https://ollama.com" onchange="updateProvider('ollama_cloud')"></div>
|
||||||
|
<div class="row"><label>Timeout (s)</label><input id="ollama_cloud-timeout" type="number" value="180" style="width:80px;flex:none" onchange="updateProvider('ollama_cloud')">
|
||||||
|
<button class="btn" onclick="testProvider('ollama_cloud')">Test</button></div>
|
||||||
|
</div>
|
||||||
<div class="card" id="prov-openrouter">
|
<div class="card" id="prov-openrouter">
|
||||||
<h3><div class="prov-dot" style="background:var(--blue)"></div> OpenRouter
|
<h3><div class="prov-dot" style="background:var(--blue)"></div> OpenRouter
|
||||||
<label class="toggle" style="margin-left:auto"><input type="checkbox" id="openrouter-enabled" onchange="updateProvider('openrouter')"><span class="slider"></span></label></h3>
|
<label class="toggle" style="margin-left:auto"><input type="checkbox" id="openrouter-enabled" onchange="updateProvider('openrouter')"><span class="slider"></span></label></h3>
|
||||||
@ -5016,7 +5065,7 @@ ADMIN_HTML = r"""
|
|||||||
</div>
|
</div>
|
||||||
<div id="add-cloud-modal" class="card" style="display:none;border-color:var(--accent)">
|
<div id="add-cloud-modal" class="card" style="display:none;border-color:var(--accent)">
|
||||||
<h3>Add Cloud Model</h3>
|
<h3>Add Cloud Model</h3>
|
||||||
<div class="row"><label>Provider</label><select id="add-cloud-prov"><option value="openrouter">OpenRouter</option><option value="openai">OpenAI</option><option value="anthropic">Anthropic</option></select></div>
|
<div class="row"><label>Provider</label><select id="add-cloud-prov"><option value="openrouter">OpenRouter</option><option value="ollama_cloud">Ollama Cloud</option><option value="openai">OpenAI</option><option value="anthropic">Anthropic</option></select></div>
|
||||||
<div class="row"><label>Model ID</label><input id="add-cloud-id" placeholder="e.g. meta-llama/llama-3-8b-instruct:free"></div>
|
<div class="row"><label>Model ID</label><input id="add-cloud-id" placeholder="e.g. meta-llama/llama-3-8b-instruct:free"></div>
|
||||||
<div class="row"><label>Display Name</label><input id="add-cloud-name" placeholder="e.g. Llama 3 8B Free"></div>
|
<div class="row"><label>Display Name</label><input id="add-cloud-name" placeholder="e.g. Llama 3 8B Free"></div>
|
||||||
<div class="row" style="justify-content:flex-end;gap:6px">
|
<div class="row" style="justify-content:flex-end;gap:6px">
|
||||||
@ -5026,11 +5075,27 @@ ADMIN_HTML = r"""
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- OLLAMA CLOUD TAB -->
|
||||||
|
<div id="tab-ollama_cloud" class="tab-content">
|
||||||
|
<div class="card">
|
||||||
|
<h3>Models on Ollama Cloud <button class="btn btn-primary" style="margin-left:auto" onclick="fetchOCModels()">Pull Models</button></h3>
|
||||||
|
<input class="search-input" id="oc-search" placeholder="Search models..." oninput="filterOC()">
|
||||||
|
<div class="or-list" id="oc-model-list"><div class="empty">Click "Pull Models" to load available models from ollama.com</div></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- OPENROUTER TAB -->
|
<!-- OPENROUTER TAB -->
|
||||||
<div id="tab-openrouter" class="tab-content">
|
<div id="tab-openrouter" class="tab-content">
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<h3>Free Models on OpenRouter <button class="btn btn-primary" style="margin-left:auto" onclick="fetchORModels()">Fetch Models</button></h3>
|
<h3>Models on OpenRouter <button class="btn btn-primary" style="margin-left:auto" onclick="fetchORModels()">Fetch Models</button></h3>
|
||||||
<input class="search-input" id="or-search" placeholder="Search models..." oninput="filterOR()">
|
<div style="display:flex;gap:8px;margin-bottom:8px;align-items:center">
|
||||||
|
<input class="search-input" id="or-search" placeholder="Search models..." oninput="filterOR()" style="margin-bottom:0;flex:1">
|
||||||
|
<select id="or-filter" onchange="filterOR()" style="padding:8px;background:var(--card);border:1px solid var(--border);border-radius:6px;color:var(--text1);font-size:12px">
|
||||||
|
<option value="all">All Models</option>
|
||||||
|
<option value="free">Free Only</option>
|
||||||
|
<option value="paid">Paid Only</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
<div class="or-list" id="or-model-list"><div class="empty">Click "Fetch Models" to load the list.</div></div>
|
<div class="or-list" id="or-model-list"><div class="empty">Click "Fetch Models" to load the list.</div></div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -5301,21 +5366,50 @@ async function fetchORModels() {
|
|||||||
|
|
||||||
function renderORModels() {
|
function renderORModels() {
|
||||||
const q = (document.getElementById('or-search').value || '').toLowerCase();
|
const q = (document.getElementById('or-search').value || '').toLowerCase();
|
||||||
const filtered = q ? orModels.filter(m => m.name.toLowerCase().includes(q) || m.id.toLowerCase().includes(q)) : orModels;
|
const tier = document.getElementById('or-filter').value;
|
||||||
|
let filtered = orModels;
|
||||||
|
if (q) filtered = filtered.filter(m => m.name.toLowerCase().includes(q) || m.id.toLowerCase().includes(q));
|
||||||
|
if (tier === 'free') filtered = filtered.filter(m => m.free);
|
||||||
|
if (tier === 'paid') filtered = filtered.filter(m => !m.free);
|
||||||
const el = document.getElementById('or-model-list');
|
const el = document.getElementById('or-model-list');
|
||||||
if (!filtered.length) { el.innerHTML = '<div class="empty">No models found.</div>'; return; }
|
if (!filtered.length) { el.textContent = 'No models found.'; return; }
|
||||||
const existing = new Set((config.cloud_models||[]).map(m=>m.id));
|
const existing = new Set((config.cloud_models||[]).map(m=>m.id));
|
||||||
el.innerHTML = filtered.map(m => {
|
el.textContent = '';
|
||||||
|
filtered.forEach(function(m) {
|
||||||
const added = existing.has('openrouter::'+m.id);
|
const added = existing.has('openrouter::'+m.id);
|
||||||
const ctx = m.context_length ? (m.context_length/1000).toFixed(0)+'K' : '?';
|
const ctx = m.context_length ? (m.context_length/1000).toFixed(0)+'K' : '?';
|
||||||
return `<div class="model-row">
|
const row = document.createElement('div');
|
||||||
<span class="name">${m.name}</span>
|
row.className = 'model-row';
|
||||||
<span class="meta">${ctx} ctx</span>
|
const nameEl = document.createElement('span');
|
||||||
${added
|
nameEl.className = 'name';
|
||||||
? '<button class="btn btn-sm" disabled style="opacity:0.4">Added</button>'
|
nameEl.textContent = m.name;
|
||||||
: `<button class="btn btn-sm btn-green" onclick="addOR('${m.id}','${m.name.replace(/'/g,"\\'")}')">Add</button>`}
|
const meta = document.createElement('span');
|
||||||
</div>`;
|
meta.className = 'meta';
|
||||||
}).join('');
|
if (m.free) {
|
||||||
|
meta.textContent = ctx + ' ctx · free';
|
||||||
|
meta.style.color = 'var(--green)';
|
||||||
|
} else {
|
||||||
|
const cost = (m.prompt_cost * 1e6).toFixed(2);
|
||||||
|
meta.textContent = ctx + ' ctx · $' + cost + '/M tok';
|
||||||
|
}
|
||||||
|
row.appendChild(nameEl);
|
||||||
|
row.appendChild(meta);
|
||||||
|
if (added) {
|
||||||
|
const btn = document.createElement('button');
|
||||||
|
btn.className = 'btn btn-sm';
|
||||||
|
btn.disabled = true;
|
||||||
|
btn.style.opacity = '0.4';
|
||||||
|
btn.textContent = 'Added';
|
||||||
|
row.appendChild(btn);
|
||||||
|
} else {
|
||||||
|
const btn = document.createElement('button');
|
||||||
|
btn.className = 'btn btn-sm btn-green';
|
||||||
|
btn.textContent = 'Add';
|
||||||
|
btn.onclick = function() { addOR(m.id, m.name); };
|
||||||
|
row.appendChild(btn);
|
||||||
|
}
|
||||||
|
el.appendChild(row);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function filterOR() { renderORModels(); }
|
function filterOR() { renderORModels(); }
|
||||||
@ -5328,6 +5422,62 @@ async function addOR(id, name) {
|
|||||||
toast('Added: ' + name);
|
toast('Added: ' + name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ─── Ollama Cloud model fetcher ───
|
||||||
|
let ocModels = [];
|
||||||
|
async function fetchOCModels() {
|
||||||
|
const el = document.getElementById('oc-model-list');
|
||||||
|
el.textContent = 'Fetching from ollama.com...';
|
||||||
|
const r = await fetch('/api/admin/ollama-cloud/models');
|
||||||
|
const data = await r.json();
|
||||||
|
ocModels = data.models || [];
|
||||||
|
if (data.error) { el.textContent = 'Error: '+data.error; return; }
|
||||||
|
renderOCModels();
|
||||||
|
}
|
||||||
|
function renderOCModels() {
|
||||||
|
const q = (document.getElementById('oc-search').value || '').toLowerCase();
|
||||||
|
const filtered = q ? ocModels.filter(m => m.name.toLowerCase().includes(q)) : ocModels;
|
||||||
|
const el = document.getElementById('oc-model-list');
|
||||||
|
if (!filtered.length) { el.textContent = 'No models found.'; return; }
|
||||||
|
const existing = new Set((config.cloud_models||[]).map(m=>m.id));
|
||||||
|
el.textContent = '';
|
||||||
|
filtered.forEach(function(m) {
|
||||||
|
const added = existing.has('ollama_cloud::'+m.id);
|
||||||
|
const row = document.createElement('div');
|
||||||
|
row.className = 'model-row';
|
||||||
|
const nameEl = document.createElement('span');
|
||||||
|
nameEl.className = 'name';
|
||||||
|
nameEl.textContent = m.name;
|
||||||
|
const meta = document.createElement('span');
|
||||||
|
meta.className = 'meta';
|
||||||
|
meta.textContent = m.size_gb + 'GB';
|
||||||
|
row.appendChild(nameEl);
|
||||||
|
row.appendChild(meta);
|
||||||
|
if (added) {
|
||||||
|
const btn = document.createElement('button');
|
||||||
|
btn.className = 'btn btn-sm';
|
||||||
|
btn.disabled = true;
|
||||||
|
btn.style.opacity = '0.4';
|
||||||
|
btn.textContent = 'Added';
|
||||||
|
row.appendChild(btn);
|
||||||
|
} else {
|
||||||
|
const btn = document.createElement('button');
|
||||||
|
btn.className = 'btn btn-sm btn-green';
|
||||||
|
btn.textContent = 'Add';
|
||||||
|
btn.onclick = function() { addOC(m.id, m.name); };
|
||||||
|
row.appendChild(btn);
|
||||||
|
}
|
||||||
|
el.appendChild(row);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
function filterOC() { renderOCModels(); }
|
||||||
|
async function addOC(id, name) {
|
||||||
|
config.cloud_models = config.cloud_models || [];
|
||||||
|
config.cloud_models.push({id: 'ollama_cloud::'+id, display_name: 'Ollama: '+name, enabled: true});
|
||||||
|
await saveCloudModels();
|
||||||
|
renderOCModels();
|
||||||
|
toast('Added: ' + name);
|
||||||
|
}
|
||||||
|
|
||||||
async function saveTimeouts() {
|
async function saveTimeouts() {
|
||||||
var g = parseInt(document.getElementById('global-timeout').value) || 300;
|
var g = parseInt(document.getElementById('global-timeout').value) || 300;
|
||||||
config.timeouts = config.timeouts || {};
|
config.timeouts = config.timeouts || {};
|
||||||
@ -6493,6 +6643,26 @@ def query_ollama(model, prompt, timeout):
|
|||||||
return resp.json()["response"]
|
return resp.json()["response"]
|
||||||
|
|
||||||
|
|
||||||
|
def query_ollama_cloud(model, prompt, timeout):
|
||||||
|
"""Query Ollama Cloud (ollama.com) — same API as local but with bearer auth."""
|
||||||
|
cfg = load_config()
|
||||||
|
prov = cfg["providers"].get("ollama_cloud", {})
|
||||||
|
base = prov.get("base_url", "https://ollama.com")
|
||||||
|
api_key = get_api_key("ollama_cloud")
|
||||||
|
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
||||||
|
prompt_tokens = estimate_tokens(prompt)
|
||||||
|
ctx_limit = get_context_limit(model)
|
||||||
|
num_ctx = min(max(prompt_tokens + 1024, 2048), ctx_limit)
|
||||||
|
if prompt_tokens > ctx_limit - 512:
|
||||||
|
prompt = smart_truncate(prompt, ctx_limit - 512)
|
||||||
|
resp = requests.post(f"{base}/api/chat", headers=headers, json={
|
||||||
|
"model": model, "messages": [{"role": "user", "content": prompt}],
|
||||||
|
"stream": False, "options": {"num_ctx": num_ctx}
|
||||||
|
}, timeout=timeout)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()["message"]["content"]
|
||||||
|
|
||||||
|
|
||||||
# ─── MODEL RATE-LIMIT TIMEOUT SYSTEM ─────────────────────────
|
# ─── MODEL RATE-LIMIT TIMEOUT SYSTEM ─────────────────────────
|
||||||
# Models that get 429'd are auto-disabled until admin re-enables them.
|
# Models that get 429'd are auto-disabled until admin re-enables them.
|
||||||
_model_rate_limited = {} # model_id -> {"since": timestamp, "reason": str, "count": int}
|
_model_rate_limited = {} # model_id -> {"since": timestamp, "reason": str, "count": int}
|
||||||
@ -6573,6 +6743,8 @@ def query_model(model_id, prompt):
|
|||||||
provider_name, model_name = model_id.split("::", 1)
|
provider_name, model_name = model_id.split("::", 1)
|
||||||
if provider_name == "anthropic":
|
if provider_name == "anthropic":
|
||||||
return query_anthropic(model_name, prompt, timeout)
|
return query_anthropic(model_name, prompt, timeout)
|
||||||
|
if provider_name == "ollama_cloud":
|
||||||
|
return query_ollama_cloud(model_name, prompt, timeout)
|
||||||
return query_openai_compatible(model_name, prompt, provider_name, timeout)
|
return query_openai_compatible(model_name, prompt, provider_name, timeout)
|
||||||
return query_ollama(model_id, prompt, timeout)
|
return query_ollama(model_id, prompt, timeout)
|
||||||
except requests.exceptions.HTTPError as e:
|
except requests.exceptions.HTTPError as e:
|
||||||
@ -7063,6 +7235,12 @@ def admin_test_provider():
|
|||||||
r = requests.get(f"{prov.get('base_url', 'http://localhost:11434')}/api/tags", timeout=5)
|
r = requests.get(f"{prov.get('base_url', 'http://localhost:11434')}/api/tags", timeout=5)
|
||||||
count = len(r.json().get("models", []))
|
count = len(r.json().get("models", []))
|
||||||
return jsonify({"ok": True, "message": f"Connected. {count} models available."})
|
return jsonify({"ok": True, "message": f"Connected. {count} models available."})
|
||||||
|
elif name == "ollama_cloud":
|
||||||
|
key = data.get("api_key") or get_api_key("ollama_cloud")
|
||||||
|
base = prov.get("base_url", "https://ollama.com")
|
||||||
|
r = requests.get(f"{base}/api/tags", headers={"Authorization": f"Bearer {key}"}, timeout=10)
|
||||||
|
count = len(r.json().get("models", []))
|
||||||
|
return jsonify({"ok": True, "message": f"Connected to Ollama Cloud. {count} models available."})
|
||||||
elif name == "openrouter":
|
elif name == "openrouter":
|
||||||
key = data.get("api_key") or get_api_key("openrouter")
|
key = data.get("api_key") or get_api_key("openrouter")
|
||||||
r = requests.get(f"{prov.get('base_url', 'https://openrouter.ai/api/v1')}/models",
|
r = requests.get(f"{prov.get('base_url', 'https://openrouter.ai/api/v1')}/models",
|
||||||
@ -7100,15 +7278,52 @@ def admin_openrouter_models():
|
|||||||
try:
|
try:
|
||||||
r = requests.get("https://openrouter.ai/api/v1/models", headers=headers, timeout=15)
|
r = requests.get("https://openrouter.ai/api/v1/models", headers=headers, timeout=15)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
free = []
|
models = []
|
||||||
for m in r.json().get("data", []):
|
for m in r.json().get("data", []):
|
||||||
pricing = m.get("pricing", {})
|
pricing = m.get("pricing", {})
|
||||||
if pricing.get("prompt") == "0" and pricing.get("completion") == "0":
|
prompt_cost = float(pricing.get("prompt", "0") or "0")
|
||||||
free.append({"id": m["id"], "name": m.get("name", m["id"]),
|
completion_cost = float(pricing.get("completion", "0") or "0")
|
||||||
"context_length": m.get("context_length", 0)})
|
is_free = prompt_cost == 0 and completion_cost == 0
|
||||||
_or_models_cache["data"] = free
|
models.append({
|
||||||
|
"id": m["id"], "name": m.get("name", m["id"]),
|
||||||
|
"context_length": m.get("context_length", 0),
|
||||||
|
"free": is_free,
|
||||||
|
"prompt_cost": prompt_cost,
|
||||||
|
"completion_cost": completion_cost,
|
||||||
|
})
|
||||||
|
_or_models_cache["data"] = models
|
||||||
_or_models_cache["ts"] = now
|
_or_models_cache["ts"] = now
|
||||||
return jsonify({"models": free})
|
return jsonify({"models": models})
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({"models": [], "error": str(e)})
|
||||||
|
|
||||||
|
|
||||||
|
_oc_models_cache = {"data": None, "ts": 0}
|
||||||
|
|
||||||
|
@app.route("/api/admin/ollama-cloud/models")
|
||||||
|
@admin_required
|
||||||
|
def admin_ollama_cloud_models():
|
||||||
|
import time
|
||||||
|
now = time.time()
|
||||||
|
if _oc_models_cache["data"] and now - _oc_models_cache["ts"] < 300:
|
||||||
|
return jsonify({"models": _oc_models_cache["data"]})
|
||||||
|
cfg = load_config()
|
||||||
|
prov = cfg["providers"].get("ollama_cloud", {})
|
||||||
|
base = prov.get("base_url", "https://ollama.com")
|
||||||
|
key = get_api_key("ollama_cloud")
|
||||||
|
headers = {"Authorization": f"Bearer {key}"} if key else {}
|
||||||
|
try:
|
||||||
|
r = requests.get(f"{base}/api/tags", headers=headers, timeout=15)
|
||||||
|
r.raise_for_status()
|
||||||
|
models = []
|
||||||
|
for m in r.json().get("models", []):
|
||||||
|
name = m.get("name", "")
|
||||||
|
size_gb = round(m.get("size", 0) / 1e9, 1)
|
||||||
|
models.append({"id": name, "name": name, "size_gb": size_gb,
|
||||||
|
"modified": m.get("modified_at", "")[:10]})
|
||||||
|
_oc_models_cache["data"] = models
|
||||||
|
_oc_models_cache["ts"] = now
|
||||||
|
return jsonify({"models": models})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return jsonify({"models": [], "error": str(e)})
|
return jsonify({"models": [], "error": str(e)})
|
||||||
|
|
||||||
@ -10401,7 +10616,7 @@ def run_team():
|
|||||||
"staircase": run_staircase, "drift": run_drift, "mesh": run_mesh,
|
"staircase": run_staircase, "drift": run_drift, "mesh": run_mesh,
|
||||||
"hallucination": run_hallucination, "timeloop": run_timeloop,
|
"hallucination": run_hallucination, "timeloop": run_timeloop,
|
||||||
"research": run_research, "eval": run_eval, "extract": run_extract,
|
"research": run_research, "eval": run_eval, "extract": run_extract,
|
||||||
"refine": run_refine, "adaptive": run_adaptive,
|
"refine": run_refine, "adaptive": run_adaptive, "deep_analysis": run_deep_analysis,
|
||||||
}
|
}
|
||||||
|
|
||||||
run_id = str(_uuid.uuid4())[:8]
|
run_id = str(_uuid.uuid4())[:8]
|
||||||
@ -12076,6 +12291,163 @@ def run_adaptive(config):
|
|||||||
f"Knowledge base: {'updated' if best_score is None or best_score >= score_threshold else 'not stored (below threshold)'}"
|
f"Knowledge base: {'updated' if best_score is None or best_score >= score_threshold else 'not stored (below threshold)'}"
|
||||||
)
|
)
|
||||||
yield sse({"type": "response", "model": "system", "text": summary, "role": "summary"})
|
yield sse({"type": "response", "model": "system", "text": summary, "role": "summary"})
|
||||||
|
|
||||||
|
|
||||||
|
def run_deep_analysis(config):
|
||||||
|
"""Deep Analysis: chains Research → Debate → Consensus → Adaptive scoring → Final synthesis.
|
||||||
|
Designed for cloud models — produces high-quality results that train the local knowledge base."""
|
||||||
|
import time as _time
|
||||||
|
start = _time.time()
|
||||||
|
prompt = config["prompt"]
|
||||||
|
models = config.get("models", [])
|
||||||
|
synthesizer = config.get("synthesizer", models[0] if models else "")
|
||||||
|
if len(models) < 2:
|
||||||
|
yield sse({"type": "response", "model": "system", "text": "Deep Analysis requires at least 2 models. Select your strongest cloud models.", "role": "error"})
|
||||||
|
return
|
||||||
|
|
||||||
|
yield sse({"type": "clear"})
|
||||||
|
all_outputs = {}
|
||||||
|
phase_times = {}
|
||||||
|
|
||||||
|
# ═══ PHASE 1: Multi-model Research ═══
|
||||||
|
yield sse({"type": "progress", "step": 1, "total_steps": 6, "substep": "Phase 1: Researching with all models...", "percent": 5})
|
||||||
|
yield sse({"type": "status", "message": "Phase 1/6: Research"})
|
||||||
|
research_prompt = (
|
||||||
|
f"You are a senior research analyst. Provide a thorough, well-structured response to this question. "
|
||||||
|
f"Include relevant context, consider multiple angles, cite your reasoning, and identify what you're uncertain about.\n\n"
|
||||||
|
f"QUESTION:\n{prompt}"
|
||||||
|
)
|
||||||
|
research_results = {}
|
||||||
|
p1_start = _time.time()
|
||||||
|
for i, model in enumerate(models):
|
||||||
|
pct = 5 + int((i / len(models)) * 15)
|
||||||
|
yield sse({"type": "progress", "step": 1, "total_steps": 6, "substep": f"Researching: {model}...", "percent": pct})
|
||||||
|
try:
|
||||||
|
result = safe_query(model, research_prompt)
|
||||||
|
research_results[model] = result
|
||||||
|
yield sse({"type": "response", "model": model, "text": result, "role": "researcher"})
|
||||||
|
except Exception as e:
|
||||||
|
yield sse({"type": "response", "model": model, "text": f"Error: {e}", "role": "error"})
|
||||||
|
phase_times["research"] = int((_time.time() - p1_start) * 1000)
|
||||||
|
all_outputs["research"] = research_results
|
||||||
|
|
||||||
|
if not research_results:
|
||||||
|
yield sse({"type": "response", "model": "system", "text": "All models failed in research phase.", "role": "error"})
|
||||||
|
return
|
||||||
|
|
||||||
|
# ═══ PHASE 2: Critical Debate ═══
|
||||||
|
yield sse({"type": "progress", "step": 2, "total_steps": 6, "substep": "Phase 2: Challenging findings...", "percent": 25})
|
||||||
|
yield sse({"type": "status", "message": "Phase 2/6: Debate"})
|
||||||
|
combined_research = "\n\n---\n\n".join([f"[{m}]:\n{r[:2000]}" for m, r in research_results.items()])
|
||||||
|
debate_prompt = (
|
||||||
|
f"You are a critical analyst. Multiple researchers have responded to a question. "
|
||||||
|
f"Challenge their findings. What are the weak points? What assumptions are being made? "
|
||||||
|
f"What alternative perspectives exist? What's missing?\n\n"
|
||||||
|
f"ORIGINAL QUESTION:\n{prompt}\n\n"
|
||||||
|
f"RESEARCH RESPONSES:\n{combined_research[:6000]}"
|
||||||
|
)
|
||||||
|
# Use 2 models for debate — different perspectives
|
||||||
|
debaters = models[:2] if len(models) >= 2 else models
|
||||||
|
debate_results = {}
|
||||||
|
p2_start = _time.time()
|
||||||
|
for model in debaters:
|
||||||
|
yield sse({"type": "progress", "step": 2, "total_steps": 6, "substep": f"Debating: {model}...", "percent": 30})
|
||||||
|
try:
|
||||||
|
result = safe_query(model, debate_prompt)
|
||||||
|
debate_results[model] = result
|
||||||
|
yield sse({"type": "response", "model": model, "text": result, "role": "critic"})
|
||||||
|
except Exception as e:
|
||||||
|
yield sse({"type": "response", "model": model, "text": f"Error: {e}", "role": "error"})
|
||||||
|
phase_times["debate"] = int((_time.time() - p2_start) * 1000)
|
||||||
|
all_outputs["debate"] = debate_results
|
||||||
|
|
||||||
|
# ═══ PHASE 3: Consensus Building ═══
|
||||||
|
yield sse({"type": "progress", "step": 3, "total_steps": 6, "substep": "Phase 3: Building consensus...", "percent": 45})
|
||||||
|
yield sse({"type": "status", "message": "Phase 3/6: Consensus"})
|
||||||
|
combined_debate = "\n\n---\n\n".join([f"[{m}]:\n{r[:2000]}" for m, r in debate_results.items()])
|
||||||
|
consensus_prompt = (
|
||||||
|
f"You are synthesizing research findings with critical analysis. "
|
||||||
|
f"Merge the research with the critiques. For each major point, state: "
|
||||||
|
f"(1) what's strongly supported, (2) what's contested, (3) what needs more investigation.\n\n"
|
||||||
|
f"ORIGINAL QUESTION:\n{prompt}\n\n"
|
||||||
|
f"RESEARCH:\n{combined_research[:3000]}\n\n"
|
||||||
|
f"CRITIQUES:\n{combined_debate[:3000]}"
|
||||||
|
)
|
||||||
|
p3_start = _time.time()
|
||||||
|
consensus_model = models[len(models) // 2] if len(models) > 2 else models[-1]
|
||||||
|
try:
|
||||||
|
consensus = safe_query(consensus_model, consensus_prompt)
|
||||||
|
yield sse({"type": "response", "model": consensus_model, "text": consensus, "role": "consensus"})
|
||||||
|
except Exception as e:
|
||||||
|
consensus = combined_research[:3000]
|
||||||
|
yield sse({"type": "response", "model": "system", "text": f"Consensus error, using raw research: {e}", "role": "error"})
|
||||||
|
phase_times["consensus"] = int((_time.time() - p3_start) * 1000)
|
||||||
|
all_outputs["consensus"] = consensus
|
||||||
|
|
||||||
|
# ═══ PHASE 4: Self-Evaluation ═══
|
||||||
|
yield sse({"type": "progress", "step": 4, "total_steps": 6, "substep": "Phase 4: Self-evaluation...", "percent": 60})
|
||||||
|
yield sse({"type": "status", "message": "Phase 4/6: Self-eval"})
|
||||||
|
eval_prompt = (
|
||||||
|
f"Rate the following analysis on a scale of 1-10 for: accuracy, completeness, actionability, and nuance. "
|
||||||
|
f"Return JSON: {{\"scores\": {{\"accuracy\": N, \"completeness\": N, \"actionability\": N, \"nuance\": N}}, \"overall\": N, \"strengths\": \"...\", \"gaps\": \"...\"}}\n\n"
|
||||||
|
f"QUESTION:\n{prompt[:500]}\n\nANALYSIS:\n{consensus[:4000]}"
|
||||||
|
)
|
||||||
|
p4_start = _time.time()
|
||||||
|
eval_result = {"overall": 0}
|
||||||
|
try:
|
||||||
|
eval_raw = safe_query(synthesizer, eval_prompt)
|
||||||
|
j_s, j_e = eval_raw.find("{"), eval_raw.rfind("}") + 1
|
||||||
|
if j_s >= 0 and j_e > j_s:
|
||||||
|
eval_result = json.loads(eval_raw[j_s:j_e])
|
||||||
|
yield sse({"type": "response", "model": synthesizer, "text": eval_raw, "role": "evaluator"})
|
||||||
|
except Exception as e:
|
||||||
|
yield sse({"type": "response", "model": "system", "text": f"Eval error: {e}", "role": "error"})
|
||||||
|
phase_times["evaluation"] = int((_time.time() - p4_start) * 1000)
|
||||||
|
|
||||||
|
# ═══ PHASE 5: Final Synthesis ═══
|
||||||
|
yield sse({"type": "progress", "step": 5, "total_steps": 6, "substep": "Phase 5: Final synthesis by strongest model...", "percent": 75})
|
||||||
|
yield sse({"type": "status", "message": "Phase 5/6: Final synthesis"})
|
||||||
|
gaps = eval_result.get("gaps", "")
|
||||||
|
synth_prompt = (
|
||||||
|
f"You are producing the definitive response to a question that has been researched by multiple models, "
|
||||||
|
f"critically debated, and evaluated. Produce the best possible answer.\n\n"
|
||||||
|
f"ORIGINAL QUESTION:\n{prompt}\n\n"
|
||||||
|
f"CONSENSUS ANALYSIS:\n{consensus[:4000]}\n\n"
|
||||||
|
+ (f"IDENTIFIED GAPS TO ADDRESS:\n{gaps}\n\n" if gaps else "")
|
||||||
|
+ f"Produce a comprehensive, well-structured final answer. Be specific and actionable."
|
||||||
|
)
|
||||||
|
p5_start = _time.time()
|
||||||
|
try:
|
||||||
|
final = safe_query(synthesizer, synth_prompt)
|
||||||
|
yield sse({"type": "response", "model": synthesizer, "text": final, "role": "final"})
|
||||||
|
except Exception as e:
|
||||||
|
final = consensus
|
||||||
|
yield sse({"type": "response", "model": "system", "text": f"Synthesis error, using consensus: {e}", "role": "error"})
|
||||||
|
phase_times["synthesis"] = int((_time.time() - p5_start) * 1000)
|
||||||
|
|
||||||
|
# ═══ PHASE 6: Store in Knowledge Base ═══
|
||||||
|
yield sse({"type": "progress", "step": 6, "total_steps": 6, "substep": "Phase 6: Storing in knowledge base...", "percent": 95})
|
||||||
|
yield sse({"type": "status", "message": "Phase 6/6: Knowledge base"})
|
||||||
|
overall_score = eval_result.get("overall", 7)
|
||||||
|
_kb_store(prompt, final, "deep_analysis", synthesizer, overall_score, 0.9)
|
||||||
|
yield sse({"type": "response", "model": "system",
|
||||||
|
"text": f"Final response stored in knowledge base (score: {overall_score}/10). Local models will benefit from this on future similar queries.",
|
||||||
|
"role": "notice"})
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
total_ms = int((_time.time() - start) * 1000)
|
||||||
|
model_list = ", ".join(models)
|
||||||
|
time_breakdown = " → ".join([f"{k}: {v}ms" for k, v in phase_times.items()])
|
||||||
|
summary = (
|
||||||
|
f"Deep Analysis complete in {total_ms}ms\n"
|
||||||
|
f"Pipeline: Research → Debate → Consensus → Eval → Synthesis\n"
|
||||||
|
f"Models: {model_list}\n"
|
||||||
|
f"Synthesizer: {synthesizer}\n"
|
||||||
|
f"Quality: {overall_score}/10\n"
|
||||||
|
f"Phases: {time_breakdown}\n"
|
||||||
|
f"Knowledge base updated — future adaptive runs on similar topics will use this result"
|
||||||
|
)
|
||||||
|
yield sse({"type": "response", "model": "system", "text": summary, "role": "summary"})
|
||||||
yield sse({"type": "progress", "step": 4, "total_steps": 4, "substep": "Complete", "percent": 100})
|
yield sse({"type": "progress", "step": 4, "total_steps": 4, "substep": "Complete", "percent": 100})
|
||||||
|
|
||||||
# Save adaptive run log
|
# Save adaptive run log
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user