KnowledgeBridge / server /document-routes.ts
fazeel007's picture
Fix index
f36d1f9
import { Router } from 'express';
import { upload, validateUpload, FileProcessor } from './file-upload';
import { documentProcessor } from './document-processor';
import { storage } from './storage';
import { fileUploadSchema, documentProcessingSchema, batchProcessingSchema } from '@shared/schema';
import path from 'path';
const router = Router();
/**
* Upload documents (multiple files supported)
*/
router.post('/upload', (req, res, next) => {
upload.array('files', 10)(req, res, (err) => {
if (err) {
console.error('Multer upload error:', err);
// Handle specific multer errors
if (err.code === 'LIMIT_FILE_SIZE') {
return res.status(400).json({
success: false,
error: 'File too large',
message: `File size exceeds the maximum limit of 50MB. File: ${err.field}`
});
}
if (err.code === 'LIMIT_FILE_COUNT') {
return res.status(400).json({
success: false,
error: 'Too many files',
message: 'Maximum 10 files allowed per upload'
});
}
if (err.code === 'LIMIT_UNEXPECTED_FILE') {
return res.status(400).json({
success: false,
error: 'Unexpected file field',
message: `Unexpected file field: ${err.field}`
});
}
// Handle file type errors
if (err.message && err.message.includes('Unsupported file type')) {
return res.status(400).json({
success: false,
error: 'Unsupported file type',
message: err.message
});
}
// Generic multer error
return res.status(400).json({
success: false,
error: 'File upload error',
message: err.message || 'Unknown upload error'
});
}
next();
});
}, validateUpload, async (req, res) => {
try {
const files = req.files as Express.Multer.File[];
if (!files || files.length === 0) {
return res.status(400).json({
success: false,
error: 'No files received',
message: 'No files were received by the server'
});
}
console.log(`Processing ${files.length} uploaded files`);
const uploadedDocuments = [];
for (const file of files) {
console.log(`Processing file: ${file.originalname}, size: ${file.size} bytes, type: ${file.mimetype}`);
// Extract title from filename or use provided title
const title = req.body.title || path.basename(file.originalname, path.extname(file.originalname));
const source = req.body.source || `Uploaded file: ${file.originalname}`;
// Determine source type based on MIME type
let sourceType = 'document';
if (FileProcessor.isPdfFile(file.mimetype)) {
sourceType = 'pdf';
} else if (FileProcessor.isImageFile(file.mimetype)) {
sourceType = 'image';
} else if (file.mimetype.includes('text') || file.mimetype.includes('json')) {
sourceType = 'text';
}
// Read text content for text files
let content = 'Processing...';
if (FileProcessor.isTextFile(file.mimetype)) {
try {
content = await FileProcessor.readTextFile(file.path);
} catch (error) {
console.warn(`Failed to read text file ${file.originalname}:`, error);
content = 'Failed to read file content';
}
}
// Create document record
try {
const document = await storage.createDocument({
title,
content,
source,
sourceType,
url: null,
metadata: {
originalName: file.originalname,
uploadedAt: new Date().toISOString(),
mimeType: file.mimetype,
size: file.size
},
embedding: null,
filePath: file.path,
fileName: file.originalname,
fileSize: file.size,
mimeType: file.mimetype,
processingStatus: FileProcessor.requiresOCR(file.mimetype) ? 'pending' : 'completed'
} as any);
console.log(`Successfully created document record for ${file.originalname} with ID ${document.id}`);
uploadedDocuments.push(document);
} catch (dbError) {
console.error(`Failed to create document record for ${file.originalname}:`, dbError);
throw new Error(`Database error while saving ${file.originalname}: ${dbError instanceof Error ? dbError.message : 'Unknown database error'}`);
}
}
res.status(201).json({
success: true,
message: `Successfully uploaded ${uploadedDocuments.length} document(s)`,
documents: uploadedDocuments
});
} catch (error) {
console.error('File upload error:', error);
res.status(500).json({
success: false,
error: 'File upload failed',
message: error instanceof Error ? error.message : 'Unknown error occurred'
});
}
});
/**
* Process a single document
*/
router.post('/process/:id', async (req, res) => {
try {
const documentId = parseInt(req.params.id);
const requestBody = req.body || {};
const operations = requestBody.operations || ['extract_text'];
const indexName = requestBody.indexName;
const document = await storage.getDocument(documentId);
if (!document) {
return res.status(404).json({
success: false,
error: 'Document not found'
});
}
// Update status to processing
await storage.updateDocument(documentId, {
processingStatus: 'processing'
} as any);
// Process the document
const result = await documentProcessor.processDocument(document, operations);
if (result.success) {
// Update document with results
const updateData: any = {
processingStatus: 'completed',
processedAt: new Date()
};
if (result.extractedText && result.extractedText !== document.content) {
updateData.content = result.extractedText;
}
if (result.embeddings) {
updateData.embedding = JSON.stringify(result.embeddings);
}
if (result.modalTaskId) {
updateData.modalTaskId = result.modalTaskId;
}
const updatedDocument = await storage.updateDocument(documentId, updateData);
res.json({
success: true,
message: 'Document processed successfully',
document: updatedDocument,
processingTime: result.processingTime
});
} else {
// Update status to failed
await storage.updateDocument(documentId, {
processingStatus: 'failed'
} as any);
res.status(500).json({
success: false,
error: 'Document processing failed',
message: result.error,
processingTime: result.processingTime
});
}
} catch (error) {
console.error('Document processing error:', error);
res.status(500).json({
success: false,
error: 'Processing request failed',
message: error instanceof Error ? error.message : 'Unknown error occurred'
});
}
});
/**
* Batch process multiple documents
*/
router.post('/process/batch', async (req, res) => {
try {
const { documentIds, operations = ['extract_text'], indexName } = batchProcessingSchema.parse(req.body);
// Fetch all documents
const documents = await Promise.all(
documentIds.map(id => storage.getDocument(id))
);
const validDocuments = documents.filter(doc => doc !== undefined) as any[];
if (validDocuments.length === 0) {
return res.status(404).json({
success: false,
error: 'No valid documents found'
});
}
// Update all documents to processing status
await Promise.all(
validDocuments.map(doc =>
storage.updateDocument(doc.id, { processingStatus: 'processing' } as any)
)
);
// Process documents in batch
const batchResult = await documentProcessor.batchProcessDocuments(validDocuments, operations);
// Update documents with results
const updatePromises = batchResult.results.map(async (result) => {
const updateData: any = {
processingStatus: result.success ? 'completed' : 'failed',
processedAt: new Date()
};
if (result.success) {
if (result.extractedText) {
updateData.content = result.extractedText;
}
if (result.embeddings) {
updateData.embedding = JSON.stringify(result.embeddings);
}
}
return storage.updateDocument(result.documentId, updateData);
});
await Promise.all(updatePromises);
res.json({
success: true,
message: `Batch processing completed: ${batchResult.processedCount} successful, ${batchResult.failedCount} failed`,
processedCount: batchResult.processedCount,
failedCount: batchResult.failedCount,
results: batchResult.results,
totalProcessingTime: batchResult.totalProcessingTime
});
} catch (error) {
console.error('Batch processing error:', error);
res.status(500).json({
success: false,
error: 'Batch processing failed',
message: error instanceof Error ? error.message : 'Unknown error occurred'
});
}
});
/**
* Build vector index from documents
*/
router.post('/index/build', async (req, res) => {
try {
const { documentIds, indexName = 'research_papers_clean_v2' } = req.body;
let documents;
if (documentIds && Array.isArray(documentIds)) {
// Build index from specific documents
const fetchedDocs = await Promise.all(
documentIds.map((id: number) => storage.getDocument(id))
);
documents = fetchedDocs.filter(doc => doc !== undefined) as any[];
} else {
// Build index from all completed documents
documents = await storage.getDocuments(1000, 0);
documents = documents.filter(doc => doc.processingStatus === 'completed');
}
if (documents.length === 0) {
return res.status(400).json({
success: false,
error: 'No processed documents available for indexing'
});
}
const result = await documentProcessor.buildVectorIndex(documents, indexName);
if (result.success) {
res.json({
success: true,
message: 'Vector index built successfully',
indexName: result.indexName,
documentCount: result.documentCount
});
} else {
res.status(500).json({
success: false,
error: 'Index building failed',
message: result.error
});
}
} catch (error) {
console.error('Index building error:', error);
res.status(500).json({
success: false,
error: 'Index building request failed',
message: error instanceof Error ? error.message : 'Unknown error occurred'
});
}
});
/**
* Search vector index
*/
router.post('/search/vector', async (req, res) => {
try {
const { query, indexName = 'research_papers_clean_v2', maxResults = 10 } = req.body;
if (!query || typeof query !== 'string') {
return res.status(400).json({
success: false,
error: 'Query parameter is required and must be a string'
});
}
const result = await documentProcessor.searchVectorIndex(query, indexName, maxResults);
if (result.success) {
res.json({
success: true,
query,
indexName,
results: result.results,
totalFound: result.results?.length || 0
});
} else {
res.status(500).json({
success: false,
error: 'Vector search failed',
message: result.error
});
}
} catch (error) {
console.error('Vector search error:', error);
res.status(500).json({
success: false,
error: 'Vector search request failed',
message: error instanceof Error ? error.message : 'Unknown error occurred'
});
}
});
/**
* Get document processing status
*/
router.get('/status/:id', async (req, res) => {
try {
const documentId = parseInt(req.params.id);
const document = await storage.getDocument(documentId);
if (!document) {
return res.status(404).json({
success: false,
error: 'Document not found'
});
}
res.json({
success: true,
document: {
id: document.id,
title: document.title,
processingStatus: (document as any).processingStatus,
modalTaskId: (document as any).modalTaskId,
createdAt: document.createdAt,
processedAt: (document as any).processedAt,
fileSize: (document as any).fileSize,
mimeType: (document as any).mimeType
}
});
} catch (error) {
console.error('Status check error:', error);
res.status(500).json({
success: false,
error: 'Status check failed',
message: error instanceof Error ? error.message : 'Unknown error occurred'
});
}
});
/**
* Get all documents with filtering
*/
router.get('/list', async (req, res) => {
try {
const {
limit = 50,
offset = 0,
sourceType,
processingStatus
} = req.query;
let documents;
if (sourceType) {
documents = await storage.getDocumentsBySourceType(sourceType as string);
} else if (processingStatus && 'getDocumentsByProcessingStatus' in storage) {
documents = await (storage as any).getDocumentsByProcessingStatus(processingStatus as string);
} else {
documents = await storage.getDocuments(Number(limit), Number(offset));
}
res.json({
success: true,
documents,
totalCount: documents.length
});
} catch (error) {
console.error('Document list error:', error);
res.status(500).json({
success: false,
error: 'Failed to retrieve documents',
message: error instanceof Error ? error.message : 'Unknown error occurred'
});
}
});
/**
* Delete a document and its file
*/
router.delete('/:id', async (req, res) => {
try {
const documentId = parseInt(req.params.id);
const document = await storage.getDocument(documentId);
if (!document) {
return res.status(404).json({
success: false,
error: 'Document not found'
});
}
// Delete file if it exists
if ((document as any).filePath) {
await FileProcessor.deleteFile((document as any).filePath);
}
// Delete document record
const deleted = await storage.deleteDocument(documentId);
if (deleted) {
res.json({
success: true,
message: 'Document deleted successfully'
});
} else {
res.status(500).json({
success: false,
error: 'Failed to delete document'
});
}
} catch (error) {
console.error('Document deletion error:', error);
res.status(500).json({
success: false,
error: 'Document deletion failed',
message: error instanceof Error ? error.message : 'Unknown error occurred'
});
}
});
export default router;