Copy // Sophisticated multi-LLM integration with intelligent routing
export class LLMOrchestrator {
private models: Map<string, LLMProvider> = new Map();
private routingEngine: LLMRoutingEngine;
private responseOptimizer: ResponseOptimizer;
private qualityAssurance: ResponseQualityAssurance;
private costOptimizer: CostOptimizer;
constructor(private config: LLMConfig) {
this.routingEngine = new LLMRoutingEngine(config.routing);
this.responseOptimizer = new ResponseOptimizer(config.optimization);
this.qualityAssurance = new ResponseQualityAssurance(config.quality);
this.costOptimizer = new CostOptimizer(config.cost);
this.initializeModels();
}
private async initializeModels() {
// Initialize GPT-4 for complex analysis
this.models.set('gpt4', new OpenAIProvider({
model: 'gpt-4-turbo-preview',
apiKey: this.config.openai.apiKey,
organization: this.config.openai.organization,
capabilities: {
strengths: ['complex_analysis', 'detailed_explanations', 'mathematical_reasoning'],
maxTokens: 4096,
costPerToken: 0.00003,
latency: 'medium'
}
}));
// Initialize Claude for technical accuracy
this.models.set('claude', new AnthropicProvider({
model: 'claude-3-sonnet-20240229',
apiKey: this.config.anthropic.apiKey,
capabilities: {
strengths: ['technical_accuracy', 'safety', 'structured_responses'],
maxTokens: 4096,
costPerToken: 0.000015,
latency: 'low'
}
}));
// Initialize Mistral for speed and efficiency
this.models.set('mistral', new MistralProvider({
model: 'mistral-large-latest',
apiKey: this.config.mistral.apiKey,
capabilities: {
strengths: ['speed', 'efficiency', 'code_generation'],
maxTokens: 8192,
costPerToken: 0.000008,
latency: 'very_low'
}
}));
// Local model for privacy-sensitive queries
this.models.set('local', new LocalLLMProvider({
model: 'crypto-analyst-7b',
modelPath: './models/crypto-analyst-7b',
capabilities: {
strengths: ['privacy', 'domain_specific', 'low_cost'],
maxTokens: 2048,
costPerToken: 0,
latency: 'low'
}
}));
}
async generateResponse(
prompt: string,
context: ConversationContext,
requirements: ResponseRequirements
): Promise<LLMResponse> {
try {
// Determine optimal model for this request
const selectedModel = await this.routingEngine.selectModel(prompt, context, requirements);
// Optimize prompt for selected model
const optimizedPrompt = await this.responseOptimizer.optimizePrompt(
prompt,
selectedModel,
context
);
// Generate response
const response = await this.generateWithModel(selectedModel, optimizedPrompt, context);
// Quality assurance check
const qualityCheck = await this.qualityAssurance.validateResponse(response, requirements);
if (!qualityCheck.isAcceptable && qualityCheck.suggestedFallback) {
// Try fallback model
const fallbackResponse = await this.generateWithModel(
qualityCheck.suggestedFallback,
optimizedPrompt,
context
);
return this.selectBestResponse([response, fallbackResponse], requirements);
}
return response;
} catch (error) {
logger.error('LLM response generation failed:', error);
// Generate fallback response
return this.generateFallbackResponse(prompt, context, error);
}
}
private async generateWithModel(
modelName: string,
prompt: string,
context: ConversationContext
): Promise<LLMResponse> {
const provider = this.models.get(modelName);
if (!provider) {
throw new Error(`Model not available: ${modelName}`);
}
const startTime = Date.now();
try {
const completion = await provider.complete({
prompt,
context: context.conversationHistory,
temperature: this.getOptimalTemperature(modelName, context),
maxTokens: this.getOptimalMaxTokens(modelName, context),
systemPrompt: this.buildSystemPrompt(modelName, context)
});
const responseTime = Date.now() - startTime;
return {
content: completion.content,
model: modelName,
confidence: completion.confidence || 0.8,
responseTime,
tokenUsage: completion.usage,
cost: this.calculateCost(modelName, completion.usage),
metadata: {
temperature: completion.temperature,
finishReason: completion.finishReason,
safetyScores: completion.safetyScores
}
};
} catch (error) {
logger.error(`Model ${modelName} generation failed:`, error);
throw error;
}
}
private buildSystemPrompt(modelName: string, context: ConversationContext): string {
const basePrompt = `You are Kaizen AI, an expert cryptocurrency analyst and security researcher. Your role is to help users understand crypto projects, assess risks, and make informed decisions.
Core principles:
- Provide accurate, data-driven analysis
- Always include confidence levels and caveats
- Explain complex concepts clearly
- Prioritize user safety and security
- Be transparent about limitations
Current context:
- User experience level: ${context.userProfile?.experienceLevel || 'unknown'}
- Risk tolerance: ${context.userProfile?.riskTolerance || 'unknown'}
- Previous conversation topics: ${context.topics.slice(-3).join(', ')}`;
// Model-specific adjustments
const modelAdjustments = {
'gpt4': '\n\nUse your advanced reasoning capabilities for complex multi-step analysis. Provide detailed explanations and consider edge cases.',
'claude': '\n\nFocus on technical accuracy and safety. Structure your responses clearly with proper sections and bullet points when appropriate.',
'mistral': '\n\nBe concise and efficient while maintaining accuracy. Prioritize direct answers with essential details.',
'local': '\n\nLeverage your crypto domain expertise. Use technical terminology appropriately for the user\'s experience level.'
};
return basePrompt + (modelAdjustments[modelName] || '');
}
async streamResponse(
prompt: string,
context: ConversationContext,
requirements: ResponseRequirements
): AsyncGenerator<StreamChunk> {
try {
const selectedModel = await this.routingEngine.selectModel(prompt, context, requirements);
const provider = this.models.get(selectedModel);
if (!provider?.supportsStreaming) {
// Fallback to non-streaming
const response = await this.generateResponse(prompt, context, requirements);
yield { type: 'content', content: response.content, isComplete: true };
return;
}
const optimizedPrompt = await this.responseOptimizer.optimizePrompt(
prompt,
selectedModel,
context
);
const stream = provider.streamComplete({
prompt: optimizedPrompt,
context: context.conversationHistory,
temperature: this.getOptimalTemperature(selectedModel, context),
systemPrompt: this.buildSystemPrompt(selectedModel, context)
});
let accumulatedContent = '';
for await (const chunk of stream) {
accumulatedContent += chunk.content;
yield {
type: 'content',
content: chunk.content,
isComplete: false,
metadata: {
model: selectedModel,
tokenCount: chunk.tokenCount
}
};
// Periodic quality checks during streaming
if (accumulatedContent.length > 500 && accumulatedContent.length % 500 === 0) {
const qualityCheck = await this.qualityAssurance.validatePartialResponse(
accumulatedContent,
requirements
);
if (qualityCheck.shouldStop) {
yield {
type: 'error',
content: 'Response quality degraded - stopping generation',
isComplete: true
};
return;
}
}
}
yield {
type: 'complete',
content: '',
isComplete: true,
metadata: {
model: selectedModel,
totalTokens: accumulatedContent.length
}
};
} catch (error) {
logger.error('Streaming response failed:', error);
yield {
type: 'error',
content: 'Failed to generate streaming response',
isComplete: true,
error: error.message
};
}
}
}
// LLM routing engine for optimal model selection
export class LLMRoutingEngine {
constructor(private config: RoutingConfig) {}
async selectModel(
prompt: string,
context: ConversationContext,
requirements: ResponseRequirements
): Promise<string> {
// Analyze prompt characteristics
const promptAnalysis = await this.analyzePrompt(prompt);
// Consider context factors
const contextFactors = this.analyzeContext(context);
// Apply routing rules
const routingScore = this.calculateRoutingScores(promptAnalysis, contextFactors, requirements);
// Select best model
const selectedModel = this.selectBestModel(routingScore);
logger.debug('Model routing decision', {
prompt: prompt.substring(0, 100),
selectedModel,
scores: routingScore,
factors: { promptAnalysis, contextFactors, requirements }
});
return selectedModel;
}
private async analyzePrompt(prompt: string): Promise<PromptAnalysis> {
return {
complexity: this.calculateComplexity(prompt),
length: prompt.length,
requiresReasoning: this.requiresReasoning(prompt),
requiresAccuracy: this.requiresAccuracy(prompt),
requiresSpeed: this.requiresSpeed(prompt),
requiresPrivacy: this.requiresPrivacy(prompt),
domain: this.identifyDomain(prompt),
urgency: this.assessUrgency(prompt)
};
}
private calculateComplexity(prompt: string): number {
let complexity = 0;
// Multi-step reasoning indicators
const reasoningIndicators = ['analyze', 'compare', 'evaluate', 'calculate', 'determine'];
complexity += reasoningIndicators.filter(indicator =>
prompt.toLowerCase().includes(indicator)
).length * 0.2;
// Technical depth indicators
const technicalIndicators = ['smart contract', 'blockchain', 'defi', 'tokenomics'];
complexity += technicalIndicators.filter(indicator =>
prompt.toLowerCase().includes(indicator)
).length * 0.15;
// Question complexity
const questionCount = (prompt.match(/\?/g) || []).length;
complexity += Math.min(questionCount * 0.1, 0.3);
// Conditional logic indicators
const conditionalIndicators = ['if', 'when', 'unless', 'provided that'];
complexity += conditionalIndicators.filter(indicator =>
prompt.toLowerCase().includes(indicator)
).length * 0.1;
return Math.min(complexity, 1.0);
}
private selectBestModel(scores: ModelScores): string {
const models = Object.entries(scores);
models.sort(([, a], [, b]) => b - a);
const bestModel = models[0][0];
const bestScore = models[0][1];
// Ensure minimum threshold is met
if (bestScore < this.config.minSelectionThreshold) {
return this.config.defaultModel || 'claude';
}
return bestModel;
}
private calculateRoutingScores(
promptAnalysis: PromptAnalysis,
contextFactors: ContextFactors,
requirements: ResponseRequirements
): ModelScores {
const scores: ModelScores = {
'gpt4': 0,
'claude': 0,
'mistral': 0,
'local': 0
};
// GPT-4 scoring
scores.gpt4 += promptAnalysis.complexity * 0.4;
scores.gpt4 += promptAnalysis.requiresReasoning ? 0.3 : 0;
scores.gpt4 += contextFactors.isComplexConversation ? 0.2 : 0;
scores.gpt4 -= requirements.prioritizeSpeed ? 0.3 : 0;
scores.gpt4 -= requirements.costSensitive ? 0.4 : 0;
// Claude scoring
scores.claude += promptAnalysis.requiresAccuracy ? 0.4 : 0;
scores.claude += contextFactors.requiresSafety ? 0.3 : 0;
scores.claude += requirements.prioritizeAccuracy ? 0.3 : 0;
scores.claude += promptAnalysis.domain === 'technical' ? 0.2 : 0;
// Mistral scoring
scores.mistral += requirements.prioritizeSpeed ? 0.5 : 0;
scores.mistral += requirements.costSensitive ? 0.3 : 0;
scores.mistral += promptAnalysis.length < 500 ? 0.2 : 0;
scores.mistral -= promptAnalysis.complexity > 0.7 ? 0.4 : 0;
// Local model scoring
scores.local += promptAnalysis.requiresPrivacy ? 0.6 : 0;
scores.local += requirements.costSensitive ? 0.4 : 0;
scores.local += promptAnalysis.domain === 'crypto' ? 0.3 : 0;
scores.local -= promptAnalysis.complexity > 0.8 ? 0.5 : 0;
// Normalize scores
const maxScore = Math.max(...Object.values(scores));
if (maxScore > 0) {
for (const model in scores) {
scores[model] = scores[model] / maxScore;
}
}
return scores;
}
}