2026-04-27 15:55:24 +00:00
1 changed files with 35 additions and 4 deletions
--- a/crates/gateway/src/main.rs
+++ b/crates/gateway/src/main.rs
@ -95,8 +95,35 @@ async fn main() {
        tracing::warn!("workspace rebuild: {e}");
    }

-    // AI sidecar client
-    let ai_client = aibridge::client::AiClient::new(&config.sidecar.url);
+    // AI sidecar clients — Phase 44 part 3 (2026-04-27).
+    //
+    // Two flavors of the same client:
+    // - `ai_client_direct` posts directly to ${sidecar}/generate. Used
+    //   inside the gateway by V1State + the legacy /ai proxy. These
+    //   call sites are themselves the implementation of /v1/chat
+    //   (or its sidecar shim), so routing them through /v1/chat
+    //   would self-loop.
+    // - `ai_client_observable` posts via ${gateway}/v1/chat with
+    //   provider="ollama". Used by vectord modules (autotune agent,
+    //   /vectors service) so their LLM calls land in /v1/usage and
+    //   Langfuse traces. Adds one localhost HTTP hop per call (~ms);
+    //   accepted for the observability gain.
+    //
+    // The gateway can call its own /v1/chat over localhost during
+    // boot's transient period because we don't fire any LLM calls
+    // until the listener is up — the observable client is just
+    // configured here, not exercised.
+    let ai_client_direct = aibridge::client::AiClient::new(&config.sidecar.url);
+    let gateway_self_url = format!("http://{}:{}", config.gateway.host, config.gateway.port);
+    let ai_client_observable = aibridge::client::AiClient::new_with_gateway(
+        &config.sidecar.url,
+        &gateway_self_url,
+    );
+    // Backwards-compat alias for the (many) existing references in this file.
+    // Defaults to direct so the existing wiring (V1State, /ai proxy)
+    // keeps its non-self-loop transport. New vectord wiring below
+    // explicitly uses ai_client_observable.
+    let ai_client = ai_client_direct.clone();

    // Vector service components — built before the router because both the
    // /vectors service AND ingestd need the agent handle to enqueue triggers.
@ -134,7 +161,9 @@ async fn main() {
        agent_cfg,
        vectord::agent::AgentDeps {
            store: store.clone(),
-            ai_client: ai_client.clone(),
+            // Observable: autotune agent's LLM calls go through
+            // /v1/chat for /v1/usage + Langfuse visibility.
+            ai_client: ai_client_observable.clone(),
            catalog: registry.clone(),
            index_registry: index_reg.clone(),
            hnsw_store: hnsw.clone(),
@ -189,7 +218,9 @@ async fn main() {
        }))
        .nest("/vectors", vectord::service::router(vectord::service::VectorState {
            store: store.clone(),
-            ai_client: ai_client.clone(),
+            // Observable: /vectors service's LLM calls (RAG, summary,
+            // playbook synthesis, etc.) flow through /v1/chat.
+            ai_client: ai_client_observable.clone(),
            job_tracker: vectord::jobs::JobTracker::new(),
            index_registry: index_reg.clone(),
            hnsw_store: hnsw,