|
import { Express } from "express"; |
|
import { createServer, Server } from "http"; |
|
import { z } from "zod"; |
|
import { storage } from "./storage"; |
|
import { searchRequestSchema } from "@shared/schema"; |
|
import OpenAI from "openai"; |
|
|
|
interface GitHubRepo { |
|
id: number; |
|
name: string; |
|
full_name: string; |
|
description: string; |
|
html_url: string; |
|
stargazers_count: number; |
|
language: string; |
|
topics: string[]; |
|
created_at: string; |
|
updated_at: string; |
|
} |
|
|
|
const openai = new OpenAI({ |
|
apiKey: process.env.OPENAI_API_KEY |
|
}); |
|
|
|
|
|
async function searchWeb(query: string, maxResults: number = 10): Promise<any[]> { |
|
try { |
|
const searchUrl = `https://api.duckduckgo.com/?q=${encodeURIComponent(query)}&format=json&no_html=1&skip_disambig=1`; |
|
|
|
const response = await fetch(searchUrl); |
|
if (!response.ok) { |
|
console.error('DuckDuckGo API error:', response.status); |
|
return []; |
|
} |
|
|
|
const data = await response.json(); |
|
const results = []; |
|
|
|
|
|
if (data.AbstractText && data.AbstractURL) { |
|
results.push({ |
|
title: data.Heading || query, |
|
content: data.AbstractText, |
|
url: data.AbstractURL, |
|
source: data.AbstractSource || 'Web Search', |
|
type: 'instant_answer' |
|
}); |
|
} |
|
|
|
|
|
if (data.RelatedTopics && Array.isArray(data.RelatedTopics)) { |
|
for (const topic of data.RelatedTopics.slice(0, maxResults - results.length)) { |
|
if (topic.Text && topic.FirstURL) { |
|
results.push({ |
|
title: topic.Text.split(' - ')[0] || topic.Text.substring(0, 60), |
|
content: topic.Text, |
|
url: topic.FirstURL, |
|
source: 'DuckDuckGo', |
|
type: 'related_topic' |
|
}); |
|
} |
|
} |
|
} |
|
|
|
return results; |
|
} catch (error) { |
|
console.error('Web search error:', error); |
|
return []; |
|
} |
|
} |
|
|
|
|
|
function transformWebResultToDocument(result: any, rank: number, query: string): any { |
|
const snippet = result.content.length > 200 ? |
|
result.content.substring(0, 200) + '...' : |
|
result.content; |
|
|
|
return { |
|
id: `web_${Date.now()}_${rank}`, |
|
title: result.title, |
|
content: result.content, |
|
snippet, |
|
source: result.source, |
|
sourceType: 'web', |
|
url: result.url, |
|
metadata: { |
|
search_type: result.type, |
|
fetched_at: new Date().toISOString() |
|
}, |
|
relevanceScore: Math.max(0.4, 1 - (rank * 0.15)), |
|
rank: rank + 1, |
|
searchQuery: query, |
|
retrievalTime: Math.random() * 0.2 + 0.1, |
|
tokenCount: Math.floor(result.content.length / 4) |
|
}; |
|
} |
|
|
|
async function searchGitHubRepos(query: string, maxResults: number = 10): Promise<any[]> { |
|
try { |
|
|
|
const lowerQuery = query.toLowerCase(); |
|
let searchQuery = ''; |
|
|
|
|
|
const byAuthorMatch = query.match(/by\s+([a-zA-Z0-9_-]+(?:\s+[a-zA-Z0-9_-]+)*)/i); |
|
if (byAuthorMatch) { |
|
const authorName = byAuthorMatch[1].trim(); |
|
const topicPart = query.replace(/by\s+[a-zA-Z0-9_-]+(?:\s+[a-zA-Z0-9_-]+)*/i, '').trim(); |
|
|
|
|
|
const authorSearches = [ |
|
`${topicPart} user:${authorName.replace(/\s+/g, '')}`, |
|
`${topicPart} user:${authorName.replace(/\s+/g, '')} language:python`, |
|
`${topicPart} user:${authorName.replace(/\s+/g, '')} language:"jupyter notebook"`, |
|
`${topicPart} "${authorName}"` |
|
]; |
|
|
|
|
|
searchQuery = authorSearches[0]; |
|
} else if (lowerQuery.includes('data structures') || lowerQuery.includes('algorithm')) { |
|
|
|
searchQuery = `${query} "data structures" OR "algorithms" language:python`; |
|
} else { |
|
searchQuery = `${query} language:python`; |
|
} |
|
|
|
console.log('GitHub search query:', searchQuery); |
|
|
|
const response = await fetch(`https://api.github.com/search/repositories?q=${encodeURIComponent(searchQuery)}&sort=stars&order=desc&per_page=${maxResults}`, { |
|
headers: { |
|
'Authorization': `token ${process.env.GITHUB_TOKEN}`, |
|
'Accept': 'application/vnd.github.v3+json', |
|
'User-Agent': 'Knowledge-Base-Browser' |
|
} |
|
}); |
|
|
|
if (!response.ok) { |
|
console.error('GitHub API error:', response.status, response.statusText); |
|
return []; |
|
} |
|
|
|
const data = await response.json(); |
|
|
|
|
|
if ((!data.items || data.items.length === 0) && byAuthorMatch) { |
|
const authorName = byAuthorMatch[1].trim(); |
|
const topicPart = query.replace(/by\s+[a-zA-Z0-9_-]+(?:\s+[a-zA-Z0-9_-]+)*/i, '').trim(); |
|
|
|
|
|
const fallbackQueries = [ |
|
`"${authorName}" ${topicPart}`, |
|
`${topicPart} "${authorName}"`, |
|
`${authorName} ${topicPart}`, |
|
`${topicPart} user:${authorName.replace(/\s+/g, '')}`, |
|
`${topicPart}` |
|
]; |
|
|
|
for (const fallbackQuery of fallbackQueries) { |
|
console.log('Trying fallback query:', fallbackQuery); |
|
|
|
const fallbackResponse = await fetch(`https://api.github.com/search/repositories?q=${encodeURIComponent(fallbackQuery)}&sort=stars&order=desc&per_page=${maxResults}`, { |
|
headers: { |
|
'Authorization': `token ${process.env.GITHUB_TOKEN}`, |
|
'Accept': 'application/vnd.github.v3+json', |
|
'User-Agent': 'Knowledge-Base-Browser' |
|
} |
|
}); |
|
|
|
if (fallbackResponse.ok) { |
|
const fallbackData = await fallbackResponse.json(); |
|
if (fallbackData.items && fallbackData.items.length > 0) { |
|
|
|
const authorFilteredResults = fallbackData.items.filter((repo: any) => |
|
repo.owner.login.toLowerCase().includes(authorName.toLowerCase()) || |
|
repo.full_name.toLowerCase().includes(authorName.toLowerCase()) || |
|
repo.description?.toLowerCase().includes(authorName.toLowerCase()) |
|
); |
|
|
|
if (authorFilteredResults.length > 0) { |
|
return authorFilteredResults; |
|
} else { |
|
return fallbackData.items; |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
return data.items || []; |
|
} catch (error) { |
|
console.error('Error fetching GitHub repos:', error); |
|
return []; |
|
} |
|
} |
|
|
|
function transformGitHubRepoToDocument(repo: GitHubRepo, rank: number, query: string): any { |
|
const snippet = repo.description ? |
|
repo.description.substring(0, 200) + (repo.description.length > 200 ? '...' : '') : |
|
'No description available'; |
|
|
|
return { |
|
id: repo.id, |
|
title: `${repo.name} - ${repo.full_name}`, |
|
content: `${repo.description || 'No description available'}\n\nRepository: ${repo.full_name}\nLanguage: ${repo.language}\nStars: ${repo.stargazers_count}\nTopics: ${repo.topics.join(', ')}\nCreated: ${repo.created_at}\nLast Updated: ${repo.updated_at}`, |
|
snippet, |
|
source: `GitHub Repository`, |
|
sourceType: 'code', |
|
url: repo.html_url, |
|
metadata: { |
|
stars: repo.stargazers_count, |
|
language: repo.language, |
|
topics: repo.topics, |
|
created_at: repo.created_at, |
|
updated_at: repo.updated_at |
|
}, |
|
relevanceScore: Math.max(0.5, 1 - (rank * 0.1)), |
|
rank: rank + 1, |
|
searchQuery: query, |
|
retrievalTime: Math.random() * 0.3 + 0.1, |
|
tokenCount: Math.floor((repo.description?.length || 100) / 4) |
|
}; |
|
} |
|
|
|
export async function registerRoutes(app: Express): Promise<Server> { |
|
|
|
app.post("/api/search", async (req, res) => { |
|
try { |
|
const searchRequest = searchRequestSchema.parse(req.body); |
|
const streaming = req.body.streaming === true; |
|
const startTime = Date.now(); |
|
|
|
|
|
const localResults = await storage.searchDocuments(searchRequest); |
|
let allDocuments = localResults.results || []; |
|
|
|
|
|
const minResults = 3; |
|
if (allDocuments.length < minResults) { |
|
console.log(`Local search returned ${allDocuments.length} results, fetching external sources...`); |
|
|
|
|
|
const isCodeQuery = searchRequest.query.toLowerCase().includes('python') || |
|
searchRequest.query.toLowerCase().includes('data structures') || |
|
searchRequest.query.toLowerCase().includes('algorithm') || |
|
searchRequest.query.toLowerCase().includes('repository') || |
|
searchRequest.query.toLowerCase().includes('code'); |
|
|
|
|
|
const searchPromises = []; |
|
|
|
if (isCodeQuery && process.env.GITHUB_TOKEN) { |
|
searchPromises.push( |
|
searchGitHubRepos(searchRequest.query, Math.min(5, searchRequest.limit)) |
|
.then(repos => repos.map((repo, index) => |
|
transformGitHubRepoToDocument(repo, index + allDocuments.length, searchRequest.query) |
|
)) |
|
); |
|
} |
|
|
|
|
|
searchPromises.push( |
|
searchWeb(searchRequest.query, Math.min(5, searchRequest.limit - allDocuments.length)) |
|
.then(webResults => webResults.map((result, index) => |
|
transformWebResultToDocument(result, index + allDocuments.length, searchRequest.query) |
|
)) |
|
); |
|
|
|
|
|
const externalResults = await Promise.all(searchPromises); |
|
const flattenedResults = externalResults.flat(); |
|
|
|
|
|
allDocuments = [...allDocuments, ...flattenedResults] |
|
.sort((a, b) => b.relevanceScore - a.relevanceScore) |
|
.slice(0, searchRequest.limit); |
|
} |
|
|
|
const searchTime = (Date.now() - startTime) / 1000; |
|
const response = { |
|
results: allDocuments, |
|
totalCount: allDocuments.length, |
|
searchTime, |
|
query: searchRequest.query, |
|
queryId: Date.now() |
|
}; |
|
|
|
res.json(response); |
|
} catch (error) { |
|
if (error instanceof z.ZodError) { |
|
res.status(400).json({ message: "Invalid search request", errors: error.errors }); |
|
} else { |
|
console.error('Search error:', error); |
|
res.status(500).json({ message: "Internal server error" }); |
|
} |
|
} |
|
}); |
|
|
|
|
|
app.post("/api/explain", async (req, res) => { |
|
try { |
|
const { title, snippet, content } = req.body; |
|
|
|
if (!title || !snippet) { |
|
return res.status(400).json({ message: "Title and snippet are required" }); |
|
} |
|
|
|
const prompt = `Explain this document in a clear, conversational way suitable for audio playback: |
|
|
|
Title: ${title} |
|
Content: ${snippet} |
|
|
|
Provide a brief, engaging explanation (2-3 sentences) that would be pleasant to listen to. Focus on the key concepts and practical value.`; |
|
|
|
const response = await openai.chat.completions.create({ |
|
model: "gpt-4o", |
|
messages: [{ role: "user", content: prompt }], |
|
max_tokens: 150, |
|
temperature: 0.7, |
|
}); |
|
|
|
const explanation = response.choices[0].message.content; |
|
res.json({ explanation }); |
|
} catch (error) { |
|
console.error('AI explanation error:', error); |
|
res.status(500).json({ message: "Failed to generate explanation" }); |
|
} |
|
}); |
|
|
|
|
|
app.get("/api/documents", async (req, res) => { |
|
try { |
|
const limit = parseInt(req.query.limit as string) || 50; |
|
const offset = parseInt(req.query.offset as string) || 0; |
|
const documents = await storage.getDocuments(limit, offset); |
|
res.json(documents); |
|
} catch (error) { |
|
res.status(500).json({ message: "Failed to fetch documents" }); |
|
} |
|
}); |
|
|
|
const httpServer = createServer(app); |
|
return httpServer; |
|
} |