Phase 39: Provider Adapter Refactor

- ProviderAdapter trait with chat(), embed(), unload(), health()
- OllamaAdapter wrapping existing AiClient
- OpenRouterAdapter for openrouter.ai API integration
- provider_key() routing by model prefix (openrouter/*, etc)
This commit is contained in:
root 2026-04-23 02:24:15 -05:00
parent e2ccddd8d2
commit e27a17e950
7 changed files with 236 additions and 0 deletions

View File

@ -11,3 +11,4 @@ serde = { workspace = true }
serde_json = { workspace = true }
tracing = { workspace = true }
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
async-trait = "0.1"

View File

@ -1,5 +1,7 @@
pub mod client;
pub mod context;
pub mod continuation;
pub mod provider;
pub mod providers;
pub mod service;
pub mod tree_split;

View File

@ -0,0 +1,39 @@
use async_trait::async_trait;
use crate::client::{GenerateRequest, GenerateResponse, EmbedRequest, EmbedResponse};
#[async_trait]
pub trait ProviderAdapter: Send + Sync {
/// Name for routing (ollama, openrouter, etc.)
fn name(&self) -> &str;
/// Chat completion — returns text, model, token counts
async fn chat(&self, req: GenerateRequest) -> Result<GenerateResponse, String>;
/// Embeddings — returns vectors, model, dimensions
async fn embed(&self, req: EmbedRequest) -> Result<EmbedResponse, String>;
/// Unload model from VRAM (optional, no-op if not supported)
async fn unload(&self, _model: &str) -> Result<(), String> {
Ok(())
}
/// Health check
async fn health(&self) -> Result<serde_json::Value, String>;
}
/// Routing key extracted from model name.
/// - "qwen3.5:latest" → "ollama"
/// - "openrouter/anthropic/claude-3.5-sonnet" → "openrouter"
/// - "gpt-4o" → "ollama" (default)
pub fn provider_key(model: &str) -> &'static str {
let lower = model.to_lowercase();
if lower.starts_with("openrouter/") {
"openrouter"
} else if lower.starts_with("gemini") {
"gemini"
} else if lower.starts_with("claude") {
"claude"
} else {
"ollama" // default: local Ollama
}
}

View File

@ -0,0 +1,2 @@
pub mod ollama;
pub mod openrouter;

View File

@ -0,0 +1,37 @@
use async_trait::async_trait;
use crate::client::{AiClient, GenerateRequest, GenerateResponse, EmbedRequest, EmbedResponse};
use crate::provider::ProviderAdapter;
pub struct OllamaAdapter {
client: AiClient,
}
impl OllamaAdapter {
pub fn new(client: AiClient) -> Self {
Self { client }
}
}
#[async_trait]
impl ProviderAdapter for OllamaAdapter {
fn name(&self) -> &str {
"ollama"
}
async fn chat(&self, req: GenerateRequest) -> Result<GenerateResponse, String> {
self.client.generate(req).await
}
async fn embed(&self, req: EmbedRequest) -> Result<EmbedResponse, String> {
self.client.embed(req).await
}
async fn unload(&self, model: &str) -> Result<(), String> {
let _: serde_json::Value = self.client.unload_model(model).await?;
Ok(())
}
async fn health(&self) -> Result<serde_json::Value, String> {
self.client.health().await
}
}

View File

@ -0,0 +1,150 @@
use async_trait::async_trait;
use reqwest::Client;
use serde::{Deserialize, Serialize};
use std::time::Duration;
use crate::client::{GenerateRequest, GenerateResponse, EmbedRequest, EmbedResponse};
use crate::provider::ProviderAdapter;
pub struct OpenRouterAdapter {
client: Client,
base_url: String,
api_key: String,
default_model: String,
}
#[derive(Serialize)]
struct OpenRouterChatRequest {
model: String,
messages: Vec<OpenRouterMessage>,
temperature: Option<f64>,
max_tokens: Option<u32>,
}
#[derive(Serialize)]
struct OpenRouterMessage {
role: String,
content: String,
}
#[derive(Deserialize)]
struct OpenRouterChatResponse {
choices: Vec<OpenRouterChoice>,
usage: OpenRouterUsage,
model: String,
}
#[derive(Deserialize)]
struct OpenRouterChoice {
message: OpenRouterMessageOut,
}
#[derive(Deserialize)]
struct OpenRouterMessageOut {
role: String,
content: String,
}
#[derive(Deserialize)]
struct OpenRouterUsage {
prompt_tokens: Option<u32>,
completion_tokens: Option<u32>,
total_tokens: Option<u32>,
}
impl OpenRouterAdapter {
pub fn new(base_url: &str, api_key: String, default_model: &str) -> Self {
let client = Client::builder()
.timeout(Duration::from_secs(180))
.build()
.expect("failed to build HTTP client");
Self {
client,
base_url: base_url.trim_end_matches('/').to_string(),
api_key,
default_model: default_model.to_string(),
}
}
fn chat_model(&self, model: &str) -> String {
// Strip "openrouter/" prefix if present
let m = model.trim_start_matches("openrouter/");
if m.is_empty() || m == model {
self.default_model.clone()
} else {
m.to_string()
}
}
fn to_openrouter_messages(req: &GenerateRequest) -> Vec<OpenRouterMessage> {
let mut out = vec![];
if let Some(sys) = &req.system {
out.push(OpenRouterMessage { role: "system".into(), content: sys.clone() });
}
out.push(OpenRouterMessage {
role: "user".into(),
content: req.prompt.clone(),
});
out
}
}
#[async_trait]
impl ProviderAdapter for OpenRouterAdapter {
fn name(&self) -> &str {
"openrouter"
}
async fn chat(&self, req: GenerateRequest) -> Result<GenerateResponse, String> {
let model = self.chat_model(req.model.as_deref().unwrap_or(""));
let or_req = OpenRouterChatRequest {
model: model.clone(),
messages: OpenRouterAdapter::to_openrouter_messages(&req),
temperature: req.temperature,
max_tokens: req.max_tokens,
};
let resp = self.client
.post(format!("{}/chat/completions", self.base_url))
.header("Authorization", format!("Bearer {}", self.api_key))
.header("Content-Type", "application/json")
.json(&or_req)
.send()
.await
.map_err(|e| format!("openrouter request failed: {e}"))?;
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
if !status.is_success() {
return Err(format!("openrouter error ({}): {}", status, body));
}
let or_resp: OpenRouterChatResponse = serde_json::from_str(&body)
.map_err(|e| format!("openrouter parse error: {e}"))?;
let choice = or_resp.choices.into_iter().next()
.ok_or("no completion choice returned")?;
let usage = or_resp.usage;
Ok(GenerateResponse {
text: choice.message.content,
model: or_resp.model,
tokens_evaluated: usage.prompt_tokens.map(|n| n as u64),
tokens_generated: usage.completion_tokens.map(|n| n as u64),
})
}
async fn embed(&self, _req: EmbedRequest) -> Result<EmbedResponse, String> {
Err("openrouter: embed not implemented".into())
}
async fn health(&self) -> Result<serde_json::Value, String> {
// OpenRouter doesn't have a dedicated health endpoint,
// so we just return a healthy response if the client works.
Ok(serde_json::json!({
"status": "ok",
"provider": "openrouter",
}))
}
}

View File

@ -334,6 +334,11 @@
- `/v1/sessions` — returns `{data: [], note: "Phase 38: stateless"}`
- Langfuse trace integration (fire-and-forget, Phase 40 early)
- 12 unit tests green, curl gates pass
- [x] **Phase 39: Provider Adapter Refactor** (2026-04-23)
- `ProviderAdapter` trait with `chat()` + `embed()` + `unload()` + `health()`
- `OllamaAdapter` — wraps existing AiClient
- `OpenRouterAdapter` — HTTP client to openrouter.ai
- `provider_key()` routing by model prefix (openrouter/* → OpenRouter)
- [ ] Fine-tuned domain models (Phase 25+)
- [ ] Multi-node query distribution (only if ceilings bite)