From e27a17e950c38c59b5a989b161442a20178099c3 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 23 Apr 2026 02:24:15 -0500 Subject: [PATCH] Phase 39: Provider Adapter Refactor - ProviderAdapter trait with chat(), embed(), unload(), health() - OllamaAdapter wrapping existing AiClient - OpenRouterAdapter for openrouter.ai API integration - provider_key() routing by model prefix (openrouter/*, etc) --- crates/aibridge/Cargo.toml | 1 + crates/aibridge/src/lib.rs | 2 + crates/aibridge/src/provider.rs | 39 +++++ crates/aibridge/src/providers/mod.rs | 2 + crates/aibridge/src/providers/ollama.rs | 37 +++++ crates/aibridge/src/providers/openrouter.rs | 150 ++++++++++++++++++++ docs/PHASES.md | 5 + 7 files changed, 236 insertions(+) create mode 100644 crates/aibridge/src/provider.rs create mode 100644 crates/aibridge/src/providers/mod.rs create mode 100644 crates/aibridge/src/providers/ollama.rs create mode 100644 crates/aibridge/src/providers/openrouter.rs diff --git a/crates/aibridge/Cargo.toml b/crates/aibridge/Cargo.toml index 5a1a93b..dc2c0fe 100644 --- a/crates/aibridge/Cargo.toml +++ b/crates/aibridge/Cargo.toml @@ -11,3 +11,4 @@ serde = { workspace = true } serde_json = { workspace = true } tracing = { workspace = true } reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } +async-trait = "0.1" diff --git a/crates/aibridge/src/lib.rs b/crates/aibridge/src/lib.rs index 04e8022..12d3be0 100644 --- a/crates/aibridge/src/lib.rs +++ b/crates/aibridge/src/lib.rs @@ -1,5 +1,7 @@ pub mod client; pub mod context; pub mod continuation; +pub mod provider; +pub mod providers; pub mod service; pub mod tree_split; diff --git a/crates/aibridge/src/provider.rs b/crates/aibridge/src/provider.rs new file mode 100644 index 0000000..40a4713 --- /dev/null +++ b/crates/aibridge/src/provider.rs @@ -0,0 +1,39 @@ +use async_trait::async_trait; +use crate::client::{GenerateRequest, GenerateResponse, EmbedRequest, EmbedResponse}; + +#[async_trait] +pub trait ProviderAdapter: Send + Sync { + /// Name for routing (ollama, openrouter, etc.) + fn name(&self) -> &str; + + /// Chat completion — returns text, model, token counts + async fn chat(&self, req: GenerateRequest) -> Result; + + /// Embeddings — returns vectors, model, dimensions + async fn embed(&self, req: EmbedRequest) -> Result; + + /// Unload model from VRAM (optional, no-op if not supported) + async fn unload(&self, _model: &str) -> Result<(), String> { + Ok(()) + } + + /// Health check + async fn health(&self) -> Result; +} + +/// Routing key extracted from model name. +/// - "qwen3.5:latest" → "ollama" +/// - "openrouter/anthropic/claude-3.5-sonnet" → "openrouter" +/// - "gpt-4o" → "ollama" (default) +pub fn provider_key(model: &str) -> &'static str { + let lower = model.to_lowercase(); + if lower.starts_with("openrouter/") { + "openrouter" + } else if lower.starts_with("gemini") { + "gemini" + } else if lower.starts_with("claude") { + "claude" + } else { + "ollama" // default: local Ollama + } +} \ No newline at end of file diff --git a/crates/aibridge/src/providers/mod.rs b/crates/aibridge/src/providers/mod.rs new file mode 100644 index 0000000..a908d89 --- /dev/null +++ b/crates/aibridge/src/providers/mod.rs @@ -0,0 +1,2 @@ +pub mod ollama; +pub mod openrouter; \ No newline at end of file diff --git a/crates/aibridge/src/providers/ollama.rs b/crates/aibridge/src/providers/ollama.rs new file mode 100644 index 0000000..c515f56 --- /dev/null +++ b/crates/aibridge/src/providers/ollama.rs @@ -0,0 +1,37 @@ +use async_trait::async_trait; +use crate::client::{AiClient, GenerateRequest, GenerateResponse, EmbedRequest, EmbedResponse}; +use crate::provider::ProviderAdapter; + +pub struct OllamaAdapter { + client: AiClient, +} + +impl OllamaAdapter { + pub fn new(client: AiClient) -> Self { + Self { client } + } +} + +#[async_trait] +impl ProviderAdapter for OllamaAdapter { + fn name(&self) -> &str { + "ollama" + } + + async fn chat(&self, req: GenerateRequest) -> Result { + self.client.generate(req).await + } + + async fn embed(&self, req: EmbedRequest) -> Result { + self.client.embed(req).await + } + + async fn unload(&self, model: &str) -> Result<(), String> { + let _: serde_json::Value = self.client.unload_model(model).await?; + Ok(()) + } + + async fn health(&self) -> Result { + self.client.health().await + } +} \ No newline at end of file diff --git a/crates/aibridge/src/providers/openrouter.rs b/crates/aibridge/src/providers/openrouter.rs new file mode 100644 index 0000000..9584dbe --- /dev/null +++ b/crates/aibridge/src/providers/openrouter.rs @@ -0,0 +1,150 @@ +use async_trait::async_trait; +use reqwest::Client; +use serde::{Deserialize, Serialize}; +use std::time::Duration; + +use crate::client::{GenerateRequest, GenerateResponse, EmbedRequest, EmbedResponse}; +use crate::provider::ProviderAdapter; + +pub struct OpenRouterAdapter { + client: Client, + base_url: String, + api_key: String, + default_model: String, +} + +#[derive(Serialize)] +struct OpenRouterChatRequest { + model: String, + messages: Vec, + temperature: Option, + max_tokens: Option, +} + +#[derive(Serialize)] +struct OpenRouterMessage { + role: String, + content: String, +} + +#[derive(Deserialize)] +struct OpenRouterChatResponse { + choices: Vec, + usage: OpenRouterUsage, + model: String, +} + +#[derive(Deserialize)] +struct OpenRouterChoice { + message: OpenRouterMessageOut, +} + +#[derive(Deserialize)] +struct OpenRouterMessageOut { + role: String, + content: String, +} + +#[derive(Deserialize)] +struct OpenRouterUsage { + prompt_tokens: Option, + completion_tokens: Option, + total_tokens: Option, +} + +impl OpenRouterAdapter { + pub fn new(base_url: &str, api_key: String, default_model: &str) -> Self { + let client = Client::builder() + .timeout(Duration::from_secs(180)) + .build() + .expect("failed to build HTTP client"); + Self { + client, + base_url: base_url.trim_end_matches('/').to_string(), + api_key, + default_model: default_model.to_string(), + } + } + + fn chat_model(&self, model: &str) -> String { + // Strip "openrouter/" prefix if present + let m = model.trim_start_matches("openrouter/"); + if m.is_empty() || m == model { + self.default_model.clone() + } else { + m.to_string() + } + } + + fn to_openrouter_messages(req: &GenerateRequest) -> Vec { + let mut out = vec![]; + if let Some(sys) = &req.system { + out.push(OpenRouterMessage { role: "system".into(), content: sys.clone() }); + } + out.push(OpenRouterMessage { + role: "user".into(), + content: req.prompt.clone(), + }); + out + } +} + +#[async_trait] +impl ProviderAdapter for OpenRouterAdapter { + fn name(&self) -> &str { + "openrouter" + } + + async fn chat(&self, req: GenerateRequest) -> Result { + let model = self.chat_model(req.model.as_deref().unwrap_or("")); + let or_req = OpenRouterChatRequest { + model: model.clone(), + messages: OpenRouterAdapter::to_openrouter_messages(&req), + temperature: req.temperature, + max_tokens: req.max_tokens, + }; + + let resp = self.client + .post(format!("{}/chat/completions", self.base_url)) + .header("Authorization", format!("Bearer {}", self.api_key)) + .header("Content-Type", "application/json") + .json(&or_req) + .send() + .await + .map_err(|e| format!("openrouter request failed: {e}"))?; + + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + + if !status.is_success() { + return Err(format!("openrouter error ({}): {}", status, body)); + } + + let or_resp: OpenRouterChatResponse = serde_json::from_str(&body) + .map_err(|e| format!("openrouter parse error: {e}"))?; + + let choice = or_resp.choices.into_iter().next() + .ok_or("no completion choice returned")?; + let usage = or_resp.usage; + + Ok(GenerateResponse { + text: choice.message.content, + model: or_resp.model, + tokens_evaluated: usage.prompt_tokens.map(|n| n as u64), + tokens_generated: usage.completion_tokens.map(|n| n as u64), + }) + } + + async fn embed(&self, _req: EmbedRequest) -> Result { + Err("openrouter: embed not implemented".into()) + } + + async fn health(&self) -> Result { + // OpenRouter doesn't have a dedicated health endpoint, + // so we just return a healthy response if the client works. + Ok(serde_json::json!({ + "status": "ok", + "provider": "openrouter", + })) + } +} \ No newline at end of file diff --git a/docs/PHASES.md b/docs/PHASES.md index 4e20b9d..a17ca78 100644 --- a/docs/PHASES.md +++ b/docs/PHASES.md @@ -334,6 +334,11 @@ - `/v1/sessions` — returns `{data: [], note: "Phase 38: stateless"}` - Langfuse trace integration (fire-and-forget, Phase 40 early) - 12 unit tests green, curl gates pass +- [x] **Phase 39: Provider Adapter Refactor** (2026-04-23) + - `ProviderAdapter` trait with `chat()` + `embed()` + `unload()` + `health()` + - `OllamaAdapter` — wraps existing AiClient + - `OpenRouterAdapter` — HTTP client to openrouter.ai + - `provider_key()` routing by model prefix (openrouter/* → OpenRouter) - [ ] Fine-tuned domain models (Phase 25+) - [ ] Multi-node query distribution (only if ceilings bite)