KnowledgeBridge / server /nebius-client.ts
fazeel007's picture
initial commit
7c012de
/**
* Nebius AI Client for Advanced LLM and Embedding Capabilities
*/
interface NebiusConfig {
apiKey: string;
baseUrl: string;
}
interface EmbeddingRequest {
input: string | string[];
model?: string;
}
interface EmbeddingResponse {
data: Array<{
embedding: number[];
index: number;
}>;
model: string;
usage: {
prompt_tokens: number;
total_tokens: number;
};
}
interface ChatCompletionRequest {
model: string;
messages: Array<{
role: 'system' | 'user' | 'assistant';
content: string;
}>;
temperature?: number;
max_tokens?: number;
stream?: boolean;
}
interface DocumentAnalysisRequest {
content: string;
analysisType: 'summary' | 'classification' | 'key_points' | 'quality_score';
useMarkdown?: boolean;
metadata?: Record<string, any>;
}
class NebiusClient {
private config: NebiusConfig;
constructor() {
this.config = {
apiKey: process.env.NEBIUS_API_KEY || '',
baseUrl: 'https://api.studio.nebius.ai/v1'
};
if (!this.config.apiKey) {
console.warn('Warning: NEBIUS_API_KEY not configured. AI features will not work.');
}
}
private async makeRequest(endpoint: string, options: RequestInit = {}) {
const url = `${this.config.baseUrl}${endpoint}`;
if (!this.config.apiKey) {
throw new Error('Nebius API key is not configured');
}
const response = await fetch(url, {
...options,
headers: {
'Authorization': `Bearer ${this.config.apiKey}`,
'Content-Type': 'application/json',
...options.headers,
},
});
if (!response.ok) {
const error = await response.text();
throw new Error(`Nebius API request failed: ${response.status} - ${error}`);
}
return response.json();
}
/**
* Generate embeddings using Nebius models
* Supported models: BAAI/bge-en-icl, BAAI/bge-multilingual-gemma2, intfloat/e5-mistral-7b-instruct
*/
async createEmbeddings(request: EmbeddingRequest): Promise<EmbeddingResponse> {
// Use the working model we verified
const workingModel = 'BAAI/bge-en-icl';
try {
console.log(`Using Nebius embedding model: ${workingModel}`);
const response = await this.makeRequest('/embeddings', {
method: 'POST',
body: JSON.stringify({
input: request.input,
model: workingModel
})
});
console.log(`βœ… Embeddings successful with ${workingModel}`);
return response;
} catch (error) {
console.log(`❌ Embedding model ${workingModel} failed:`, error instanceof Error ? error.message : String(error));
// If the main model fails, create a mock response for demonstration
console.warn('Nebius embedding failed, creating mock response');
const inputText = Array.isArray(request.input) ? request.input[0] : request.input;
const mockEmbedding = this.generateMockEmbedding(inputText);
return {
data: [{
embedding: mockEmbedding,
index: 0
}],
model: 'mock-embedding-model',
usage: {
prompt_tokens: inputText.split(' ').length,
total_tokens: inputText.split(' ').length
}
};
}
}
/**
* Generate a mock embedding for demonstration purposes
*/
private generateMockEmbedding(text: string): number[] {
// Create a simple hash-based mock embedding
const embedding = new Array(1536).fill(0);
for (let i = 0; i < text.length && i < embedding.length; i++) {
const charCode = text.charCodeAt(i);
embedding[i] = (Math.sin(charCode * 0.1) + Math.cos(charCode * 0.05)) / 2;
}
// Normalize the embedding
const magnitude = Math.sqrt(embedding.reduce((sum, val) => sum + val * val, 0));
return embedding.map(val => magnitude > 0 ? val / magnitude : 0);
}
/**
* Generate chat completions using Nebius LLMs
* Supported models: deepseek-ai/DeepSeek-R1-0528, Qwen/Qwen3-235B-A22B, nvidia/Llama-3_1-Nemotron-Ultra-253B-v1
*/
async createChatCompletion(request: ChatCompletionRequest): Promise<any> {
return this.makeRequest('/chat/completions', {
method: 'POST',
body: JSON.stringify({
model: request.model || 'deepseek-ai/DeepSeek-R1-0528', // Default to DeepSeek
messages: request.messages,
temperature: request.temperature || 0.7,
max_tokens: request.max_tokens || 1000,
stream: request.stream || false
})
});
}
/**
* Analyze document content using advanced LLM reasoning
*/
async analyzeDocument(request: DocumentAnalysisRequest): Promise<any> {
const basePrompts = {
summary: "You are an expert document summarizer. Create a concise, informative summary highlighting the key points and main conclusions.",
classification: "You are a document classifier. Categorize this document into one of these types: academic_paper, technical_documentation, research_report, code_repository, blog_post, news_article. Explain your reasoning.",
key_points: "You are an expert at extracting key information. Identify the most important points, findings, and conclusions from this document. Format as a structured list.",
quality_score: "You are a document quality assessor. Evaluate this document's credibility, accuracy, and usefulness on a scale of 1-10. Explain your scoring criteria."
};
// Add formatting instructions based on user preference
const formatInstruction = request.useMarkdown === false
? " IMPORTANT: Use only plain text formatting. Do not use any markdown syntax like **bold**, *italic*, #headers, or bullet points with */-. Use simple text with clear line breaks and numbering like 1., 2., 3. Keep it clean and readable without any special formatting characters."
: " Use markdown formatting for better readability - use **bold** for emphasis, bullet points, and clear section headers.";
const systemPrompts = Object.fromEntries(
Object.entries(basePrompts).map(([key, prompt]) => [key, prompt + formatInstruction])
);
const response = await this.createChatCompletion({
model: 'deepseek-ai/DeepSeek-R1-0528',
messages: [
{
role: 'system',
content: systemPrompts[request.analysisType]
},
{
role: 'user',
content: `Please analyze this document:\n\n${request.content}`
}
],
temperature: 0.3,
max_tokens: 1500
});
// Clean up DeepSeek R1 thinking tags for better user experience
let cleanedAnalysis = response.choices[0].message.content;
if (cleanedAnalysis.includes('<think>')) {
// Remove everything between <think> and </think> tags
cleanedAnalysis = cleanedAnalysis.replace(/<think>[\s\S]*?<\/think>\s*/g, '');
}
// Additional cleanup for plain text mode
if (request.useMarkdown === false) {
// Remove markdown formatting that might still appear
cleanedAnalysis = cleanedAnalysis
.replace(/\*\*(.*?)\*\*/g, '$1') // Remove **bold**
.replace(/\*(.*?)\*/g, '$1') // Remove *italic*
.replace(/#{1,6}\s/g, '') // Remove # headers
.replace(/^\s*[\*\-\+]\s/gm, '') // Remove bullet points
.replace(/^\s*\d+\.\s/gm, (match: string) => {
// Keep numbered lists but ensure clean formatting
return match.replace(/^\s*/, '');
});
}
return {
analysis: cleanedAnalysis.trim(),
analysisType: request.analysisType,
metadata: request.metadata
};
}
/**
* Enhance search queries using LLM understanding
*/
async enhanceQuery(originalQuery: string, context?: string): Promise<{
enhancedQuery: string;
intent: string;
keywords: string[];
suggestions: string[];
}> {
const response = await this.createChatCompletion({
model: 'deepseek-ai/DeepSeek-R1-0528',
messages: [
{
role: 'system',
content: `You are a search query enhancement expert. Given a user query, improve it for better document retrieval by:
1. Identifying the search intent
2. Expanding with relevant keywords
3. Suggesting alternative queries
4. Reformulating for better semantic search
Respond in JSON format:
{
"enhancedQuery": "improved version of the query",
"intent": "what the user is trying to find",
"keywords": ["key", "terms", "to", "search"],
"suggestions": ["alternative query 1", "alternative query 2"]
}`
},
{
role: 'user',
content: `Original query: "${originalQuery}"${context ? `\nContext: ${context}` : ''}`
}
],
temperature: 0.4
});
try {
return JSON.parse(response.choices[0].message.content);
} catch (error) {
// Fallback if JSON parsing fails
return {
enhancedQuery: originalQuery,
intent: 'information_search',
keywords: originalQuery.split(' '),
suggestions: [originalQuery]
};
}
}
/**
* Score citation relevance using LLM reasoning
*/
async scoreCitationRelevance(query: string, document: {
title: string;
content: string;
snippet: string;
}): Promise<{
relevanceScore: number;
explanation: string;
keyReasons: string[];
}> {
const response = await this.createChatCompletion({
model: 'deepseek-ai/DeepSeek-R1-0528',
messages: [
{
role: 'system',
content: `You are a relevance scoring expert. Evaluate how relevant a document is to a user's query on a scale of 0-1. Consider:
- Semantic similarity
- Content alignment
- Topic relevance
- Information quality
Respond in JSON format:
{
"relevanceScore": 0.85,
"explanation": "brief explanation of the score",
"keyReasons": ["reason 1", "reason 2", "reason 3"]
}`
},
{
role: 'user',
content: `Query: "${query}"
Document:
Title: ${document.title}
Content Preview: ${document.snippet}
Please score the relevance of this document to the query.`
}
],
temperature: 0.2
});
try {
return JSON.parse(response.choices[0].message.content);
} catch (error) {
return {
relevanceScore: 0.5,
explanation: 'Unable to analyze relevance',
keyReasons: ['Default scoring used']
};
}
}
/**
* Generate contextual research insights
*/
async generateResearchInsights(documents: Array<{
title: string;
content: string;
metadata?: any;
}>, query: string): Promise<{
synthesis: string;
keyFindings: string[];
gaps: string[];
recommendations: string[];
}> {
const documentSummaries = documents.map((doc, i) =>
`Document ${i + 1}: ${doc.title}\n${doc.content.substring(0, 500)}...`
).join('\n\n');
const response = await this.createChatCompletion({
model: 'deepseek-ai/DeepSeek-R1-0528',
messages: [
{
role: 'system',
content: `You are a research synthesis expert. Analyze multiple documents and provide comprehensive insights. Respond in JSON format:
{
"synthesis": "comprehensive synthesis of all documents",
"keyFindings": ["finding 1", "finding 2", "finding 3"],
"gaps": ["knowledge gap 1", "gap 2"],
"recommendations": ["recommendation 1", "recommendation 2"]
}`
},
{
role: 'user',
content: `Research Query: "${query}"
Documents to analyze:
${documentSummaries}
Please provide a comprehensive research synthesis.`
}
],
temperature: 0.5,
max_tokens: 2000
});
try {
return JSON.parse(response.choices[0].message.content);
} catch (error) {
return {
synthesis: 'Unable to generate synthesis',
keyFindings: [],
gaps: [],
recommendations: []
};
}
}
}
export const nebiusClient = new NebiusClient();
export type { EmbeddingRequest, EmbeddingResponse, DocumentAnalysisRequest };