|
import { Router } from 'express'; |
|
import { upload, validateUpload, FileProcessor } from './file-upload'; |
|
import { documentProcessor } from './document-processor'; |
|
import { storage } from './storage'; |
|
import { fileUploadSchema, documentProcessingSchema, batchProcessingSchema } from '@shared/schema'; |
|
import path from 'path'; |
|
|
|
const router = Router(); |
|
|
|
|
|
|
|
|
|
router.post('/upload', (req, res, next) => { |
|
upload.array('files', 10)(req, res, (err) => { |
|
if (err) { |
|
console.error('Multer upload error:', err); |
|
|
|
|
|
if (err.code === 'LIMIT_FILE_SIZE') { |
|
return res.status(400).json({ |
|
success: false, |
|
error: 'File too large', |
|
message: `File size exceeds the maximum limit of 50MB. File: ${err.field}` |
|
}); |
|
} |
|
|
|
if (err.code === 'LIMIT_FILE_COUNT') { |
|
return res.status(400).json({ |
|
success: false, |
|
error: 'Too many files', |
|
message: 'Maximum 10 files allowed per upload' |
|
}); |
|
} |
|
|
|
if (err.code === 'LIMIT_UNEXPECTED_FILE') { |
|
return res.status(400).json({ |
|
success: false, |
|
error: 'Unexpected file field', |
|
message: `Unexpected file field: ${err.field}` |
|
}); |
|
} |
|
|
|
|
|
if (err.message && err.message.includes('Unsupported file type')) { |
|
return res.status(400).json({ |
|
success: false, |
|
error: 'Unsupported file type', |
|
message: err.message |
|
}); |
|
} |
|
|
|
|
|
return res.status(400).json({ |
|
success: false, |
|
error: 'File upload error', |
|
message: err.message || 'Unknown upload error' |
|
}); |
|
} |
|
|
|
next(); |
|
}); |
|
}, validateUpload, async (req, res) => { |
|
try { |
|
const files = req.files as Express.Multer.File[]; |
|
|
|
if (!files || files.length === 0) { |
|
return res.status(400).json({ |
|
success: false, |
|
error: 'No files received', |
|
message: 'No files were received by the server' |
|
}); |
|
} |
|
|
|
console.log(`Processing ${files.length} uploaded files`); |
|
const uploadedDocuments = []; |
|
|
|
for (const file of files) { |
|
console.log(`Processing file: ${file.originalname}, size: ${file.size} bytes, type: ${file.mimetype}`); |
|
|
|
|
|
const title = req.body.title || path.basename(file.originalname, path.extname(file.originalname)); |
|
const source = req.body.source || `Uploaded file: ${file.originalname}`; |
|
|
|
|
|
let sourceType = 'document'; |
|
if (FileProcessor.isPdfFile(file.mimetype)) { |
|
sourceType = 'pdf'; |
|
} else if (FileProcessor.isImageFile(file.mimetype)) { |
|
sourceType = 'image'; |
|
} else if (file.mimetype.includes('text') || file.mimetype.includes('json')) { |
|
sourceType = 'text'; |
|
} |
|
|
|
|
|
let content = 'Processing...'; |
|
if (FileProcessor.isTextFile(file.mimetype)) { |
|
try { |
|
content = await FileProcessor.readTextFile(file.path); |
|
} catch (error) { |
|
console.warn(`Failed to read text file ${file.originalname}:`, error); |
|
content = 'Failed to read file content'; |
|
} |
|
} |
|
|
|
|
|
try { |
|
const document = await storage.createDocument({ |
|
title, |
|
content, |
|
source, |
|
sourceType, |
|
url: null, |
|
metadata: { |
|
originalName: file.originalname, |
|
uploadedAt: new Date().toISOString(), |
|
mimeType: file.mimetype, |
|
size: file.size |
|
}, |
|
embedding: null, |
|
filePath: file.path, |
|
fileName: file.originalname, |
|
fileSize: file.size, |
|
mimeType: file.mimetype, |
|
processingStatus: FileProcessor.requiresOCR(file.mimetype) ? 'pending' : 'completed' |
|
} as any); |
|
|
|
console.log(`Successfully created document record for ${file.originalname} with ID ${document.id}`); |
|
uploadedDocuments.push(document); |
|
} catch (dbError) { |
|
console.error(`Failed to create document record for ${file.originalname}:`, dbError); |
|
throw new Error(`Database error while saving ${file.originalname}: ${dbError instanceof Error ? dbError.message : 'Unknown database error'}`); |
|
} |
|
} |
|
|
|
res.status(201).json({ |
|
success: true, |
|
message: `Successfully uploaded ${uploadedDocuments.length} document(s)`, |
|
documents: uploadedDocuments |
|
}); |
|
|
|
} catch (error) { |
|
console.error('File upload error:', error); |
|
res.status(500).json({ |
|
success: false, |
|
error: 'File upload failed', |
|
message: error instanceof Error ? error.message : 'Unknown error occurred' |
|
}); |
|
} |
|
}); |
|
|
|
|
|
|
|
|
|
router.post('/process/:id', async (req, res) => { |
|
try { |
|
const documentId = parseInt(req.params.id); |
|
const requestBody = req.body || {}; |
|
const operations = requestBody.operations || ['extract_text']; |
|
const indexName = requestBody.indexName; |
|
|
|
const document = await storage.getDocument(documentId); |
|
if (!document) { |
|
return res.status(404).json({ |
|
success: false, |
|
error: 'Document not found' |
|
}); |
|
} |
|
|
|
|
|
await storage.updateDocument(documentId, { |
|
processingStatus: 'processing' |
|
} as any); |
|
|
|
|
|
const result = await documentProcessor.processDocument(document, operations); |
|
|
|
if (result.success) { |
|
|
|
const updateData: any = { |
|
processingStatus: 'completed', |
|
processedAt: new Date() |
|
}; |
|
|
|
if (result.extractedText && result.extractedText !== document.content) { |
|
updateData.content = result.extractedText; |
|
} |
|
|
|
if (result.embeddings) { |
|
updateData.embedding = JSON.stringify(result.embeddings); |
|
} |
|
|
|
if (result.modalTaskId) { |
|
updateData.modalTaskId = result.modalTaskId; |
|
} |
|
|
|
const updatedDocument = await storage.updateDocument(documentId, updateData); |
|
|
|
res.json({ |
|
success: true, |
|
message: 'Document processed successfully', |
|
document: updatedDocument, |
|
processingTime: result.processingTime |
|
}); |
|
|
|
} else { |
|
|
|
await storage.updateDocument(documentId, { |
|
processingStatus: 'failed' |
|
} as any); |
|
|
|
res.status(500).json({ |
|
success: false, |
|
error: 'Document processing failed', |
|
message: result.error, |
|
processingTime: result.processingTime |
|
}); |
|
} |
|
|
|
} catch (error) { |
|
console.error('Document processing error:', error); |
|
res.status(500).json({ |
|
success: false, |
|
error: 'Processing request failed', |
|
message: error instanceof Error ? error.message : 'Unknown error occurred' |
|
}); |
|
} |
|
}); |
|
|
|
|
|
|
|
|
|
router.post('/process/batch', async (req, res) => { |
|
try { |
|
const { documentIds, operations = ['extract_text'], indexName } = batchProcessingSchema.parse(req.body); |
|
|
|
|
|
const documents = await Promise.all( |
|
documentIds.map(id => storage.getDocument(id)) |
|
); |
|
|
|
const validDocuments = documents.filter(doc => doc !== undefined) as any[]; |
|
|
|
if (validDocuments.length === 0) { |
|
return res.status(404).json({ |
|
success: false, |
|
error: 'No valid documents found' |
|
}); |
|
} |
|
|
|
|
|
await Promise.all( |
|
validDocuments.map(doc => |
|
storage.updateDocument(doc.id, { processingStatus: 'processing' } as any) |
|
) |
|
); |
|
|
|
|
|
const batchResult = await documentProcessor.batchProcessDocuments(validDocuments, operations); |
|
|
|
|
|
const updatePromises = batchResult.results.map(async (result) => { |
|
const updateData: any = { |
|
processingStatus: result.success ? 'completed' : 'failed', |
|
processedAt: new Date() |
|
}; |
|
|
|
if (result.success) { |
|
if (result.extractedText) { |
|
updateData.content = result.extractedText; |
|
} |
|
if (result.embeddings) { |
|
updateData.embedding = JSON.stringify(result.embeddings); |
|
} |
|
} |
|
|
|
return storage.updateDocument(result.documentId, updateData); |
|
}); |
|
|
|
await Promise.all(updatePromises); |
|
|
|
res.json({ |
|
success: true, |
|
message: `Batch processing completed: ${batchResult.processedCount} successful, ${batchResult.failedCount} failed`, |
|
processedCount: batchResult.processedCount, |
|
failedCount: batchResult.failedCount, |
|
results: batchResult.results, |
|
totalProcessingTime: batchResult.totalProcessingTime |
|
}); |
|
|
|
} catch (error) { |
|
console.error('Batch processing error:', error); |
|
res.status(500).json({ |
|
success: false, |
|
error: 'Batch processing failed', |
|
message: error instanceof Error ? error.message : 'Unknown error occurred' |
|
}); |
|
} |
|
}); |
|
|
|
|
|
|
|
|
|
router.post('/index/build', async (req, res) => { |
|
try { |
|
const { documentIds, indexName = 'research_papers_clean_v2' } = req.body; |
|
|
|
let documents; |
|
if (documentIds && Array.isArray(documentIds)) { |
|
|
|
const fetchedDocs = await Promise.all( |
|
documentIds.map((id: number) => storage.getDocument(id)) |
|
); |
|
documents = fetchedDocs.filter(doc => doc !== undefined) as any[]; |
|
} else { |
|
|
|
documents = await storage.getDocuments(1000, 0); |
|
documents = documents.filter(doc => doc.processingStatus === 'completed'); |
|
} |
|
|
|
if (documents.length === 0) { |
|
return res.status(400).json({ |
|
success: false, |
|
error: 'No processed documents available for indexing' |
|
}); |
|
} |
|
|
|
const result = await documentProcessor.buildVectorIndex(documents, indexName); |
|
|
|
if (result.success) { |
|
res.json({ |
|
success: true, |
|
message: 'Vector index built successfully', |
|
indexName: result.indexName, |
|
documentCount: result.documentCount |
|
}); |
|
} else { |
|
res.status(500).json({ |
|
success: false, |
|
error: 'Index building failed', |
|
message: result.error |
|
}); |
|
} |
|
|
|
} catch (error) { |
|
console.error('Index building error:', error); |
|
res.status(500).json({ |
|
success: false, |
|
error: 'Index building request failed', |
|
message: error instanceof Error ? error.message : 'Unknown error occurred' |
|
}); |
|
} |
|
}); |
|
|
|
|
|
|
|
|
|
router.post('/search/vector', async (req, res) => { |
|
try { |
|
const { query, indexName = 'research_papers_clean_v2', maxResults = 10 } = req.body; |
|
|
|
if (!query || typeof query !== 'string') { |
|
return res.status(400).json({ |
|
success: false, |
|
error: 'Query parameter is required and must be a string' |
|
}); |
|
} |
|
|
|
const result = await documentProcessor.searchVectorIndex(query, indexName, maxResults); |
|
|
|
if (result.success) { |
|
res.json({ |
|
success: true, |
|
query, |
|
indexName, |
|
results: result.results, |
|
totalFound: result.results?.length || 0 |
|
}); |
|
} else { |
|
res.status(500).json({ |
|
success: false, |
|
error: 'Vector search failed', |
|
message: result.error |
|
}); |
|
} |
|
|
|
} catch (error) { |
|
console.error('Vector search error:', error); |
|
res.status(500).json({ |
|
success: false, |
|
error: 'Vector search request failed', |
|
message: error instanceof Error ? error.message : 'Unknown error occurred' |
|
}); |
|
} |
|
}); |
|
|
|
|
|
|
|
|
|
router.get('/status/:id', async (req, res) => { |
|
try { |
|
const documentId = parseInt(req.params.id); |
|
const document = await storage.getDocument(documentId); |
|
|
|
if (!document) { |
|
return res.status(404).json({ |
|
success: false, |
|
error: 'Document not found' |
|
}); |
|
} |
|
|
|
res.json({ |
|
success: true, |
|
document: { |
|
id: document.id, |
|
title: document.title, |
|
processingStatus: (document as any).processingStatus, |
|
modalTaskId: (document as any).modalTaskId, |
|
createdAt: document.createdAt, |
|
processedAt: (document as any).processedAt, |
|
fileSize: (document as any).fileSize, |
|
mimeType: (document as any).mimeType |
|
} |
|
}); |
|
|
|
} catch (error) { |
|
console.error('Status check error:', error); |
|
res.status(500).json({ |
|
success: false, |
|
error: 'Status check failed', |
|
message: error instanceof Error ? error.message : 'Unknown error occurred' |
|
}); |
|
} |
|
}); |
|
|
|
|
|
|
|
|
|
router.get('/list', async (req, res) => { |
|
try { |
|
const { |
|
limit = 50, |
|
offset = 0, |
|
sourceType, |
|
processingStatus |
|
} = req.query; |
|
|
|
let documents; |
|
|
|
if (sourceType) { |
|
documents = await storage.getDocumentsBySourceType(sourceType as string); |
|
} else if (processingStatus && 'getDocumentsByProcessingStatus' in storage) { |
|
documents = await (storage as any).getDocumentsByProcessingStatus(processingStatus as string); |
|
} else { |
|
documents = await storage.getDocuments(Number(limit), Number(offset)); |
|
} |
|
|
|
res.json({ |
|
success: true, |
|
documents, |
|
totalCount: documents.length |
|
}); |
|
|
|
} catch (error) { |
|
console.error('Document list error:', error); |
|
res.status(500).json({ |
|
success: false, |
|
error: 'Failed to retrieve documents', |
|
message: error instanceof Error ? error.message : 'Unknown error occurred' |
|
}); |
|
} |
|
}); |
|
|
|
|
|
|
|
|
|
router.delete('/:id', async (req, res) => { |
|
try { |
|
const documentId = parseInt(req.params.id); |
|
const document = await storage.getDocument(documentId); |
|
|
|
if (!document) { |
|
return res.status(404).json({ |
|
success: false, |
|
error: 'Document not found' |
|
}); |
|
} |
|
|
|
|
|
if ((document as any).filePath) { |
|
await FileProcessor.deleteFile((document as any).filePath); |
|
} |
|
|
|
|
|
const deleted = await storage.deleteDocument(documentId); |
|
|
|
if (deleted) { |
|
res.json({ |
|
success: true, |
|
message: 'Document deleted successfully' |
|
}); |
|
} else { |
|
res.status(500).json({ |
|
success: false, |
|
error: 'Failed to delete document' |
|
}); |
|
} |
|
|
|
} catch (error) { |
|
console.error('Document deletion error:', error); |
|
res.status(500).json({ |
|
success: false, |
|
error: 'Document deletion failed', |
|
message: error instanceof Error ? error.message : 'Unknown error occurred' |
|
}); |
|
} |
|
}); |
|
|
|
export default router; |