The orchestrator process was hanging after completing its work because:
1. Fire-and-forget Redis operations in MessageBus.handleMessage() left
unhandled promises that kept the event loop alive
2. No explicit process.exit() call after cleanup
Changes:
- coordination.ts: Add .catch(() => {}) to fire-and-forget Redis ops
- orchestrator.ts: Add explicit process.exit(exitCode) after cleanup
- orchestrator.ts: Improve error handling in main() with proper exit codes
Tested: Pipeline mksup1wq completed full flow and exited cleanly.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
501 lines
16 KiB
TypeScript
501 lines
16 KiB
TypeScript
/**
|
|
* Multi-Agent Coordination System - Blackboard & Messaging
|
|
*/
|
|
|
|
import { createClient, RedisClientType } from "redis";
|
|
import { $ } from "bun";
|
|
import type {
|
|
AgentRole,
|
|
AgentMessage,
|
|
BlackboardEntry,
|
|
BlackboardSection,
|
|
AgentState,
|
|
CoordinationMetrics,
|
|
SpawnCondition,
|
|
ConsensusVote,
|
|
} from "./types";
|
|
|
|
function now(): string {
|
|
return new Date().toISOString();
|
|
}
|
|
|
|
function generateId(): string {
|
|
return Math.random().toString(36).slice(2, 10) + Date.now().toString(36);
|
|
}
|
|
|
|
async function getVaultSecret(path: string): Promise<Record<string, any>> {
|
|
const initKeys = await Bun.file("/opt/vault/init-keys.json").json();
|
|
const token = initKeys.root_token;
|
|
const result = await $`curl -sk -H "X-Vault-Token: ${token}" https://127.0.0.1:8200/v1/secret/data/${path}`.json();
|
|
return result.data.data;
|
|
}
|
|
|
|
// =============================================================================
|
|
// Blackboard System - Shared Memory for Agent Coordination
|
|
// =============================================================================
|
|
|
|
export class Blackboard {
|
|
private redis!: RedisClientType;
|
|
private taskId: string;
|
|
private versionCounters: Map<string, number> = new Map();
|
|
|
|
constructor(taskId: string) {
|
|
this.taskId = taskId;
|
|
}
|
|
|
|
async connect(): Promise<void> {
|
|
const creds = await getVaultSecret("services/dragonfly");
|
|
this.redis = createClient({
|
|
url: "redis://" + creds.host + ":" + creds.port,
|
|
password: creds.password,
|
|
});
|
|
await this.redis.connect();
|
|
}
|
|
|
|
async disconnect(): Promise<void> {
|
|
await this.redis.quit();
|
|
}
|
|
|
|
private key(section: BlackboardSection, entryKey: string): string {
|
|
return `blackboard:${this.taskId}:${section}:${entryKey}`;
|
|
}
|
|
|
|
private sectionKey(section: BlackboardSection): string {
|
|
return `blackboard:${this.taskId}:${section}`;
|
|
}
|
|
|
|
async write(section: BlackboardSection, entryKey: string, value: any, author: AgentRole): Promise<BlackboardEntry> {
|
|
const fullKey = this.key(section, entryKey);
|
|
const currentVersion = this.versionCounters.get(fullKey) || 0;
|
|
const newVersion = currentVersion + 1;
|
|
this.versionCounters.set(fullKey, newVersion);
|
|
|
|
const entry: BlackboardEntry = {
|
|
section,
|
|
key: entryKey,
|
|
value,
|
|
author,
|
|
version: newVersion,
|
|
timestamp: now(),
|
|
};
|
|
|
|
await this.redis.hSet(this.sectionKey(section), entryKey, JSON.stringify(entry));
|
|
await this.redis.rPush(`blackboard:${this.taskId}:history`, JSON.stringify({
|
|
action: "WRITE",
|
|
...entry,
|
|
}));
|
|
|
|
// Increment metrics
|
|
await this.redis.hIncrBy(`metrics:${this.taskId}`, "blackboard_writes", 1);
|
|
|
|
return entry;
|
|
}
|
|
|
|
async read(section: BlackboardSection, entryKey: string): Promise<BlackboardEntry | null> {
|
|
const data = await this.redis.hGet(this.sectionKey(section), entryKey);
|
|
if (!data) return null;
|
|
|
|
await this.redis.hIncrBy(`metrics:${this.taskId}`, "blackboard_reads", 1);
|
|
return JSON.parse(data);
|
|
}
|
|
|
|
async readSection(section: BlackboardSection): Promise<BlackboardEntry[]> {
|
|
const data = await this.redis.hGetAll(this.sectionKey(section));
|
|
const entries: BlackboardEntry[] = [];
|
|
for (const value of Object.values(data)) {
|
|
entries.push(JSON.parse(value));
|
|
}
|
|
await this.redis.hIncrBy(`metrics:${this.taskId}`, "blackboard_reads", entries.length);
|
|
return entries;
|
|
}
|
|
|
|
async getHistory(limit: number = 100): Promise<any[]> {
|
|
const data = await this.redis.lRange(`blackboard:${this.taskId}:history`, -limit, -1);
|
|
return data.map(d => JSON.parse(d));
|
|
}
|
|
|
|
// Consensus tracking
|
|
async recordVote(vote: ConsensusVote): Promise<void> {
|
|
await this.redis.rPush(`blackboard:${this.taskId}:votes:${vote.proposal_id}`, JSON.stringify(vote));
|
|
await this.write("consensus", vote.proposal_id + ":" + vote.agent, vote, vote.agent);
|
|
}
|
|
|
|
async getVotes(proposalId: string): Promise<ConsensusVote[]> {
|
|
const data = await this.redis.lRange(`blackboard:${this.taskId}:votes:${proposalId}`, 0, -1);
|
|
return data.map(d => JSON.parse(d));
|
|
}
|
|
|
|
async checkConsensus(proposalId: string, requiredAgents: AgentRole[]): Promise<{ reached: boolean; votes: ConsensusVote[] }> {
|
|
const votes = await this.getVotes(proposalId);
|
|
const acceptVotes = votes.filter(v => v.vote === "ACCEPT");
|
|
const rejectVotes = votes.filter(v => v.vote === "REJECT");
|
|
|
|
// Consensus requires majority accept and no rejects from required agents
|
|
const hasAllRequired = requiredAgents.every(agent =>
|
|
votes.some(v => v.agent === agent)
|
|
);
|
|
|
|
const reached = hasAllRequired &&
|
|
acceptVotes.length > rejectVotes.length &&
|
|
rejectVotes.length === 0;
|
|
|
|
return { reached, votes };
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// Direct Messaging System - Point-to-Point Communication
|
|
// =============================================================================
|
|
|
|
export class MessageBus {
|
|
private redis!: RedisClientType;
|
|
private subscriber!: RedisClientType;
|
|
private taskId: string;
|
|
private agentRole: AgentRole;
|
|
private messageHandlers: Map<string, (msg: AgentMessage) => void> = new Map();
|
|
|
|
constructor(taskId: string, agentRole: AgentRole) {
|
|
this.taskId = taskId;
|
|
this.agentRole = agentRole;
|
|
}
|
|
|
|
async connect(): Promise<void> {
|
|
const creds = await getVaultSecret("services/dragonfly");
|
|
const config = {
|
|
url: "redis://" + creds.host + ":" + creds.port,
|
|
password: creds.password,
|
|
};
|
|
|
|
this.redis = createClient(config);
|
|
this.subscriber = createClient(config);
|
|
|
|
await this.redis.connect();
|
|
await this.subscriber.connect();
|
|
|
|
// Subscribe to direct messages and broadcast
|
|
await this.subscriber.subscribe(`msg:${this.taskId}:${this.agentRole}`, (message) => {
|
|
this.handleMessage(JSON.parse(message));
|
|
});
|
|
await this.subscriber.subscribe(`msg:${this.taskId}:ALL`, (message) => {
|
|
this.handleMessage(JSON.parse(message));
|
|
});
|
|
}
|
|
|
|
async disconnect(): Promise<void> {
|
|
await this.subscriber.unsubscribe();
|
|
await this.subscriber.quit();
|
|
await this.redis.quit();
|
|
}
|
|
|
|
private handleMessage(msg: AgentMessage): void {
|
|
// Store in message log (fire-and-forget, errors ignored)
|
|
this.redis.rPush(`msg:${this.taskId}:log`, JSON.stringify(msg)).catch(() => {});
|
|
this.redis.hIncrBy(`metrics:${this.taskId}`, "total_messages", 1).catch(() => {});
|
|
this.redis.hIncrBy(`metrics:${this.taskId}`, "direct_messages", 1).catch(() => {});
|
|
|
|
// Call registered handlers
|
|
for (const handler of this.messageHandlers.values()) {
|
|
try {
|
|
handler(msg);
|
|
} catch (e) {
|
|
console.error("Message handler error:", e);
|
|
}
|
|
}
|
|
}
|
|
|
|
onMessage(handlerId: string, handler: (msg: AgentMessage) => void): void {
|
|
this.messageHandlers.set(handlerId, handler);
|
|
}
|
|
|
|
removeHandler(handlerId: string): void {
|
|
this.messageHandlers.delete(handlerId);
|
|
}
|
|
|
|
async send(to: AgentRole | "ALL", type: AgentMessage["type"], payload: any, correlationId?: string): Promise<AgentMessage> {
|
|
const msg: AgentMessage = {
|
|
id: generateId(),
|
|
from: this.agentRole,
|
|
to,
|
|
type,
|
|
payload,
|
|
timestamp: now(),
|
|
correlation_id: correlationId,
|
|
};
|
|
|
|
await this.redis.publish(`msg:${this.taskId}:${to}`, JSON.stringify(msg));
|
|
await this.redis.rPush(`msg:${this.taskId}:log`, JSON.stringify(msg));
|
|
await this.redis.hIncrBy(`metrics:${this.taskId}`, "total_messages", 1);
|
|
await this.redis.hIncrBy(`metrics:${this.taskId}`, "direct_messages", 1);
|
|
|
|
return msg;
|
|
}
|
|
|
|
async getMessageLog(limit: number = 100): Promise<AgentMessage[]> {
|
|
const data = await this.redis.lRange(`msg:${this.taskId}:log`, -limit, -1);
|
|
return data.map(d => JSON.parse(d));
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// Agent State Manager
|
|
// =============================================================================
|
|
|
|
export class AgentStateManager {
|
|
private redis!: RedisClientType;
|
|
private taskId: string;
|
|
|
|
constructor(taskId: string) {
|
|
this.taskId = taskId;
|
|
}
|
|
|
|
async connect(): Promise<void> {
|
|
const creds = await getVaultSecret("services/dragonfly");
|
|
this.redis = createClient({
|
|
url: "redis://" + creds.host + ":" + creds.port,
|
|
password: creds.password,
|
|
});
|
|
await this.redis.connect();
|
|
}
|
|
|
|
async disconnect(): Promise<void> {
|
|
await this.redis.quit();
|
|
}
|
|
|
|
async updateState(state: AgentState): Promise<void> {
|
|
state.last_activity = now();
|
|
await this.redis.hSet(`agents:${this.taskId}`, state.role, JSON.stringify(state));
|
|
}
|
|
|
|
async getState(role: AgentRole): Promise<AgentState | null> {
|
|
const data = await this.redis.hGet(`agents:${this.taskId}`, role);
|
|
return data ? JSON.parse(data) : null;
|
|
}
|
|
|
|
async getAllStates(): Promise<AgentState[]> {
|
|
const data = await this.redis.hGetAll(`agents:${this.taskId}`);
|
|
return Object.values(data).map(d => JSON.parse(d));
|
|
}
|
|
|
|
async isBlocked(role: AgentRole, thresholdSeconds: number): Promise<boolean> {
|
|
const state = await this.getState(role);
|
|
if (!state || !state.blocked_since) return false;
|
|
|
|
const blockedDuration = (Date.now() - new Date(state.blocked_since).getTime()) / 1000;
|
|
return blockedDuration > thresholdSeconds;
|
|
}
|
|
|
|
async detectStuckAgents(thresholdSeconds: number): Promise<AgentRole[]> {
|
|
const states = await this.getAllStates();
|
|
const stuckAgents: AgentRole[] = [];
|
|
|
|
for (const state of states) {
|
|
if (state.status === "COMPLETED" || state.status === "FAILED") continue;
|
|
|
|
const lastActivity = new Date(state.last_activity).getTime();
|
|
const inactivitySeconds = (Date.now() - lastActivity) / 1000;
|
|
|
|
if (inactivitySeconds > thresholdSeconds) {
|
|
stuckAgents.push(state.role);
|
|
}
|
|
}
|
|
|
|
return stuckAgents;
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// Spawn Controller - Manages Conditional Agent Spawning
|
|
// =============================================================================
|
|
|
|
export class SpawnController {
|
|
private redis!: RedisClientType;
|
|
private taskId: string;
|
|
private conditions: SpawnCondition[] = [];
|
|
private gammaSpawned: boolean = false;
|
|
|
|
constructor(taskId: string) {
|
|
this.taskId = taskId;
|
|
}
|
|
|
|
async connect(): Promise<void> {
|
|
const creds = await getVaultSecret("services/dragonfly");
|
|
this.redis = createClient({
|
|
url: "redis://" + creds.host + ":" + creds.port,
|
|
password: creds.password,
|
|
});
|
|
await this.redis.connect();
|
|
|
|
// Initialize default spawn conditions
|
|
this.conditions = [
|
|
{
|
|
type: "STUCK",
|
|
threshold: 30, // seconds
|
|
current_value: 0,
|
|
triggered: false,
|
|
description: "Spawn GAMMA when agents are stuck for 30+ seconds",
|
|
},
|
|
{
|
|
type: "CONFLICT",
|
|
threshold: 3, // conflicts
|
|
current_value: 0,
|
|
triggered: false,
|
|
description: "Spawn GAMMA when 3+ unresolved conflicts detected",
|
|
},
|
|
{
|
|
type: "COMPLEXITY",
|
|
threshold: 0.8, // complexity score
|
|
current_value: 0,
|
|
triggered: false,
|
|
description: "Spawn GAMMA when task complexity exceeds 0.8",
|
|
},
|
|
{
|
|
type: "SUCCESS",
|
|
threshold: 1.0, // completion
|
|
current_value: 0,
|
|
triggered: false,
|
|
description: "Spawn GAMMA to validate and finalize when task complete",
|
|
},
|
|
];
|
|
}
|
|
|
|
async disconnect(): Promise<void> {
|
|
await this.redis.quit();
|
|
}
|
|
|
|
async updateCondition(type: SpawnCondition["type"], value: number): Promise<SpawnCondition | null> {
|
|
const condition = this.conditions.find(c => c.type === type);
|
|
if (!condition) return null;
|
|
|
|
condition.current_value = value;
|
|
condition.triggered = value >= condition.threshold;
|
|
|
|
await this.redis.hSet(`spawn:${this.taskId}:conditions`, type, JSON.stringify(condition));
|
|
|
|
return condition;
|
|
}
|
|
|
|
async checkSpawnConditions(): Promise<{ shouldSpawn: boolean; reason: SpawnCondition | null }> {
|
|
if (this.gammaSpawned) {
|
|
return { shouldSpawn: false, reason: null };
|
|
}
|
|
|
|
for (const condition of this.conditions) {
|
|
if (condition.triggered) {
|
|
return { shouldSpawn: true, reason: condition };
|
|
}
|
|
}
|
|
|
|
return { shouldSpawn: false, reason: null };
|
|
}
|
|
|
|
async markGammaSpawned(reason: SpawnCondition): Promise<void> {
|
|
this.gammaSpawned = true;
|
|
await this.redis.hSet(`metrics:${this.taskId}`, "gamma_spawned", "true");
|
|
await this.redis.hSet(`metrics:${this.taskId}`, "gamma_spawn_reason", reason.type);
|
|
await this.redis.hSet(`metrics:${this.taskId}`, "gamma_spawn_time", now());
|
|
}
|
|
|
|
isGammaSpawned(): boolean {
|
|
return this.gammaSpawned;
|
|
}
|
|
|
|
getConditions(): SpawnCondition[] {
|
|
return this.conditions;
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// Metrics Collector
|
|
// =============================================================================
|
|
|
|
export class MetricsCollector {
|
|
private redis!: RedisClientType;
|
|
private taskId: string;
|
|
private startTime: number;
|
|
|
|
constructor(taskId: string) {
|
|
this.taskId = taskId;
|
|
this.startTime = Date.now();
|
|
}
|
|
|
|
async connect(): Promise<void> {
|
|
const creds = await getVaultSecret("services/dragonfly");
|
|
this.redis = createClient({
|
|
url: "redis://" + creds.host + ":" + creds.port,
|
|
password: creds.password,
|
|
});
|
|
await this.redis.connect();
|
|
|
|
await this.redis.hSet(`metrics:${this.taskId}`, {
|
|
task_id: this.taskId,
|
|
start_time: now(),
|
|
total_messages: "0",
|
|
direct_messages: "0",
|
|
blackboard_writes: "0",
|
|
blackboard_reads: "0",
|
|
conflicts_detected: "0",
|
|
conflicts_resolved: "0",
|
|
gamma_spawned: "false",
|
|
final_consensus: "false",
|
|
performance_score: "0",
|
|
});
|
|
}
|
|
|
|
async disconnect(): Promise<void> {
|
|
await this.redis.quit();
|
|
}
|
|
|
|
async increment(metric: string, by: number = 1): Promise<void> {
|
|
await this.redis.hIncrBy(`metrics:${this.taskId}`, metric, by);
|
|
}
|
|
|
|
async set(metric: string, value: string): Promise<void> {
|
|
await this.redis.hSet(`metrics:${this.taskId}`, metric, value);
|
|
}
|
|
|
|
async getMetrics(): Promise<CoordinationMetrics> {
|
|
const data = await this.redis.hGetAll(`metrics:${this.taskId}`);
|
|
|
|
return {
|
|
task_id: this.taskId,
|
|
start_time: data.start_time || now(),
|
|
end_time: data.end_time,
|
|
total_messages: parseInt(data.total_messages || "0"),
|
|
direct_messages: parseInt(data.direct_messages || "0"),
|
|
blackboard_writes: parseInt(data.blackboard_writes || "0"),
|
|
blackboard_reads: parseInt(data.blackboard_reads || "0"),
|
|
conflicts_detected: parseInt(data.conflicts_detected || "0"),
|
|
conflicts_resolved: parseInt(data.conflicts_resolved || "0"),
|
|
gamma_spawned: data.gamma_spawned === "true",
|
|
gamma_spawn_reason: data.gamma_spawn_reason,
|
|
gamma_spawn_time: data.gamma_spawn_time,
|
|
final_consensus: data.final_consensus === "true",
|
|
performance_score: parseFloat(data.performance_score || "0"),
|
|
};
|
|
}
|
|
|
|
async finalize(consensus: boolean): Promise<CoordinationMetrics> {
|
|
const endTime = now();
|
|
const elapsedMs = Date.now() - this.startTime;
|
|
|
|
await this.redis.hSet(`metrics:${this.taskId}`, "end_time", endTime);
|
|
await this.redis.hSet(`metrics:${this.taskId}`, "final_consensus", consensus ? "true" : "false");
|
|
|
|
// Calculate performance score
|
|
const metrics = await this.getMetrics();
|
|
const messageEfficiency = Math.max(0, 1 - (metrics.total_messages / 100));
|
|
const conflictPenalty = metrics.conflicts_detected * 0.1;
|
|
const timePenalty = Math.min(0.5, elapsedMs / 120000);
|
|
const consensusBonus = consensus ? 0.2 : 0;
|
|
const gammaBonus = metrics.gamma_spawned && consensus ? 0.1 : (metrics.gamma_spawned ? -0.1 : 0);
|
|
|
|
const score = Math.max(0, Math.min(1,
|
|
0.5 + messageEfficiency * 0.2 - conflictPenalty - timePenalty + consensusBonus + gammaBonus
|
|
));
|
|
|
|
await this.redis.hSet(`metrics:${this.taskId}`, "performance_score", score.toFixed(3));
|
|
|
|
return this.getMetrics();
|
|
}
|
|
}
|