KnowledgeBridge / server /routes_backup.ts
fazeel007's picture
initial commit
7c012de
import { Express } from "express";
import { createServer, Server } from "http";
import { z } from "zod";
import { storage } from "./storage";
import { searchRequestSchema } from "@shared/schema";
import OpenAI from "openai";
interface GitHubRepo {
id: number;
name: string;
full_name: string;
description: string;
html_url: string;
stargazers_count: number;
language: string;
topics: string[];
created_at: string;
updated_at: string;
}
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY
});
// Web search function using DuckDuckGo Instant Answer API
async function searchWeb(query: string, maxResults: number = 10): Promise<any[]> {
try {
const searchUrl = `https://api.duckduckgo.com/?q=${encodeURIComponent(query)}&format=json&no_html=1&skip_disambig=1`;
const response = await fetch(searchUrl);
if (!response.ok) {
console.error('DuckDuckGo API error:', response.status);
return [];
}
const data = await response.json();
const results = [];
// Process instant answer
if (data.AbstractText && data.AbstractURL) {
results.push({
title: data.Heading || query,
content: data.AbstractText,
url: data.AbstractURL,
source: data.AbstractSource || 'Web Search',
type: 'instant_answer'
});
}
// Process related topics
if (data.RelatedTopics && Array.isArray(data.RelatedTopics)) {
for (const topic of data.RelatedTopics.slice(0, maxResults - results.length)) {
if (topic.Text && topic.FirstURL) {
results.push({
title: topic.Text.split(' - ')[0] || topic.Text.substring(0, 60),
content: topic.Text,
url: topic.FirstURL,
source: 'DuckDuckGo',
type: 'related_topic'
});
}
}
}
return results;
} catch (error) {
console.error('Web search error:', error);
return [];
}
}
// Transform web search results to document format
function transformWebResultToDocument(result: any, rank: number, query: string): any {
const snippet = result.content.length > 200 ?
result.content.substring(0, 200) + '...' :
result.content;
return {
id: `web_${Date.now()}_${rank}`,
title: result.title,
content: result.content,
snippet,
source: result.source,
sourceType: 'web',
url: result.url,
metadata: {
search_type: result.type,
fetched_at: new Date().toISOString()
},
relevanceScore: Math.max(0.4, 1 - (rank * 0.15)),
rank: rank + 1,
searchQuery: query,
retrievalTime: Math.random() * 0.2 + 0.1,
tokenCount: Math.floor(result.content.length / 4)
};
}
async function searchGitHubRepos(query: string, maxResults: number = 10): Promise<any[]> {
try {
// Parse query to extract author and repository details
const lowerQuery = query.toLowerCase();
let searchQuery = '';
// Check if query contains "by [author]" pattern - handle multiple name formats
const byAuthorMatch = query.match(/by\s+([a-zA-Z0-9_-]+(?:\s+[a-zA-Z0-9_-]+)*)/i);
if (byAuthorMatch) {
const authorName = byAuthorMatch[1].trim();
const topicPart = query.replace(/by\s+[a-zA-Z0-9_-]+(?:\s+[a-zA-Z0-9_-]+)*/i, '').trim();
// Try different author search strategies - include multiple language options
const authorSearches = [
`${topicPart} user:${authorName.replace(/\s+/g, '')}`, // No language restriction first
`${topicPart} user:${authorName.replace(/\s+/g, '')} language:python`,
`${topicPart} user:${authorName.replace(/\s+/g, '')} language:"jupyter notebook"`,
`${topicPart} "${authorName}"` // Search in description/readme
];
// Use the first search strategy
searchQuery = authorSearches[0];
} else if (lowerQuery.includes('data structures') || lowerQuery.includes('algorithm')) {
// Enhanced search for data structures and algorithms
searchQuery = `${query} "data structures" OR "algorithms" language:python`;
} else {
searchQuery = `${query} language:python`;
}
console.log('GitHub search query:', searchQuery);
const response = await fetch(`https://api.github.com/search/repositories?q=${encodeURIComponent(searchQuery)}&sort=stars&order=desc&per_page=${maxResults}`, {
headers: {
'Authorization': `token ${process.env.GITHUB_TOKEN}`,
'Accept': 'application/vnd.github.v3+json',
'User-Agent': 'Knowledge-Base-Browser'
}
});
if (!response.ok) {
console.error('GitHub API error:', response.status, response.statusText);
return [];
}
const data = await response.json();
// If no results with author search, try alternative search strategies
if ((!data.items || data.items.length === 0) && byAuthorMatch) {
const authorName = byAuthorMatch[1].trim();
const topicPart = query.replace(/by\s+[a-zA-Z0-9_-]+(?:\s+[a-zA-Z0-9_-]+)*/i, '').trim();
// Try different fallback strategies without language restrictions
const fallbackQueries = [
`"${authorName}" ${topicPart}`,
`${topicPart} "${authorName}"`,
`${authorName} ${topicPart}`,
`${topicPart} user:${authorName.replace(/\s+/g, '')}`,
`${topicPart}`
];
for (const fallbackQuery of fallbackQueries) {
console.log('Trying fallback query:', fallbackQuery);
const fallbackResponse = await fetch(`https://api.github.com/search/repositories?q=${encodeURIComponent(fallbackQuery)}&sort=stars&order=desc&per_page=${maxResults}`, {
headers: {
'Authorization': `token ${process.env.GITHUB_TOKEN}`,
'Accept': 'application/vnd.github.v3+json',
'User-Agent': 'Knowledge-Base-Browser'
}
});
if (fallbackResponse.ok) {
const fallbackData = await fallbackResponse.json();
if (fallbackData.items && fallbackData.items.length > 0) {
// Filter results to prioritize those from the specified author
const authorFilteredResults = fallbackData.items.filter((repo: any) =>
repo.owner.login.toLowerCase().includes(authorName.toLowerCase()) ||
repo.full_name.toLowerCase().includes(authorName.toLowerCase()) ||
repo.description?.toLowerCase().includes(authorName.toLowerCase())
);
if (authorFilteredResults.length > 0) {
return authorFilteredResults;
} else {
return fallbackData.items;
}
}
}
}
}
return data.items || [];
} catch (error) {
console.error('Error fetching GitHub repos:', error);
return [];
}
}
function transformGitHubRepoToDocument(repo: GitHubRepo, rank: number, query: string): any {
const snippet = repo.description ?
repo.description.substring(0, 200) + (repo.description.length > 200 ? '...' : '') :
'No description available';
return {
id: repo.id,
title: `${repo.name} - ${repo.full_name}`,
content: `${repo.description || 'No description available'}\n\nRepository: ${repo.full_name}\nLanguage: ${repo.language}\nStars: ${repo.stargazers_count}\nTopics: ${repo.topics.join(', ')}\nCreated: ${repo.created_at}\nLast Updated: ${repo.updated_at}`,
snippet,
source: `GitHub Repository`,
sourceType: 'code',
url: repo.html_url,
metadata: {
stars: repo.stargazers_count,
language: repo.language,
topics: repo.topics,
created_at: repo.created_at,
updated_at: repo.updated_at
},
relevanceScore: Math.max(0.5, 1 - (rank * 0.1)),
rank: rank + 1,
searchQuery: query,
retrievalTime: Math.random() * 0.3 + 0.1,
tokenCount: Math.floor((repo.description?.length || 100) / 4)
};
}
export async function registerRoutes(app: Express): Promise<Server> {
// Enhanced search with web fallback
app.post("/api/search", async (req, res) => {
try {
const searchRequest = searchRequestSchema.parse(req.body);
const streaming = req.body.streaming === true;
const startTime = Date.now();
// First, search local storage
const localResults = await storage.searchDocuments(searchRequest);
let allDocuments = localResults.results || [];
// If local results are insufficient, enhance with external sources
const minResults = 3;
if (allDocuments.length < minResults) {
console.log(`Local search returned ${allDocuments.length} results, fetching external sources...`);
// Check if we should search GitHub for code-related queries
const isCodeQuery = searchRequest.query.toLowerCase().includes('python') ||
searchRequest.query.toLowerCase().includes('data structures') ||
searchRequest.query.toLowerCase().includes('algorithm') ||
searchRequest.query.toLowerCase().includes('repository') ||
searchRequest.query.toLowerCase().includes('code');
// Parallel search of external sources
const searchPromises = [];
if (isCodeQuery && process.env.GITHUB_TOKEN) {
searchPromises.push(
searchGitHubRepos(searchRequest.query, Math.min(5, searchRequest.limit))
.then(repos => repos.map((repo, index) =>
transformGitHubRepoToDocument(repo, index + allDocuments.length, searchRequest.query)
))
);
}
// Always include web search for broader coverage
searchPromises.push(
searchWeb(searchRequest.query, Math.min(5, searchRequest.limit - allDocuments.length))
.then(webResults => webResults.map((result, index) =>
transformWebResultToDocument(result, index + allDocuments.length, searchRequest.query)
))
);
// Wait for all external searches to complete
const externalResults = await Promise.all(searchPromises);
const flattenedResults = externalResults.flat();
// Combine and sort all results by relevance
allDocuments = [...allDocuments, ...flattenedResults]
.sort((a, b) => b.relevanceScore - a.relevanceScore)
.slice(0, searchRequest.limit);
}
const searchTime = (Date.now() - startTime) / 1000;
const response = {
results: allDocuments,
totalCount: allDocuments.length,
searchTime,
query: searchRequest.query,
queryId: Date.now()
};
res.json(response);
} catch (error) {
if (error instanceof z.ZodError) {
res.status(400).json({ message: "Invalid search request", errors: error.errors });
} else {
console.error('Search error:', error);
res.status(500).json({ message: "Internal server error" });
}
}
});
// AI explanation endpoint
app.post("/api/explain", async (req, res) => {
try {
const { title, snippet, content } = req.body;
if (!title || !snippet) {
return res.status(400).json({ message: "Title and snippet are required" });
}
const prompt = `Explain this document in a clear, conversational way suitable for audio playback:
Title: ${title}
Content: ${snippet}
Provide a brief, engaging explanation (2-3 sentences) that would be pleasant to listen to. Focus on the key concepts and practical value.`;
const response = await openai.chat.completions.create({
model: "gpt-4o", // the newest OpenAI model is "gpt-4o" which was released May 13, 2024. do not change this unless explicitly requested by the user
messages: [{ role: "user", content: prompt }],
max_tokens: 150,
temperature: 0.7,
});
const explanation = response.choices[0].message.content;
res.json({ explanation });
} catch (error) {
console.error('AI explanation error:', error);
res.status(500).json({ message: "Failed to generate explanation" });
}
});
// Other routes...
app.get("/api/documents", async (req, res) => {
try {
const limit = parseInt(req.query.limit as string) || 50;
const offset = parseInt(req.query.offset as string) || 0;
const documents = await storage.getDocuments(limit, offset);
res.json(documents);
} catch (error) {
res.status(500).json({ message: "Failed to fetch documents" });
}
});
const httpServer = createServer(app);
return httpServer;
}