fazeel007's picture
Implement complete document upload and processing pipeline with Modal integration
10ac46e
import { pgTable, text, serial, integer, boolean, timestamp, real, jsonb } from "drizzle-orm/pg-core";
import { createInsertSchema } from "drizzle-zod";
import { z } from "zod";
export const documents = pgTable("documents", {
id: serial("id").primaryKey(),
title: text("title").notNull(),
content: text("content").notNull(),
source: text("source").notNull(),
sourceType: text("source_type").notNull(), // pdf, web, code, academic, image
url: text("url"),
metadata: jsonb("metadata"), // author, date, tags, etc.
embedding: text("embedding"), // vector embedding as JSON string
filePath: text("file_path"), // local file path for uploaded files
fileName: text("file_name"), // original file name
fileSize: integer("file_size"), // file size in bytes
mimeType: text("mime_type"), // MIME type of uploaded file
processingStatus: text("processing_status").notNull().default("pending"), // pending, processing, completed, failed
modalTaskId: text("modal_task_id"), // Modal processing task ID
createdAt: timestamp("created_at").defaultNow().notNull(),
processedAt: timestamp("processed_at"),
});
export const searchQueries = pgTable("search_queries", {
id: serial("id").primaryKey(),
query: text("query").notNull(),
searchType: text("search_type").notNull().default("semantic"), // semantic, keyword, hybrid
filters: jsonb("filters"),
resultsCount: integer("results_count").default(0),
searchTime: real("search_time"), // in seconds
createdAt: timestamp("created_at").defaultNow().notNull(),
});
export const searchResults = pgTable("search_results", {
id: serial("id").primaryKey(),
queryId: integer("query_id").references(() => searchQueries.id).notNull(),
documentId: integer("document_id").references(() => documents.id).notNull(),
relevanceScore: real("relevance_score").notNull(),
snippet: text("snippet").notNull(),
rank: integer("rank").notNull(),
});
export const citations = pgTable("citations", {
id: serial("id").primaryKey(),
documentId: integer("document_id").references(() => documents.id).notNull(),
citationText: text("citation_text").notNull(),
pageNumber: integer("page_number"),
section: text("section"),
createdAt: timestamp("created_at").defaultNow().notNull(),
});
// Insert schemas
export const insertDocumentSchema = createInsertSchema(documents).omit({
id: true,
createdAt: true,
});
export const insertSearchQuerySchema = createInsertSchema(searchQueries).omit({
id: true,
createdAt: true,
});
export const insertSearchResultSchema = createInsertSchema(searchResults).omit({
id: true,
});
export const insertCitationSchema = createInsertSchema(citations).omit({
id: true,
createdAt: true,
});
// Types
export type Document = typeof documents.$inferSelect;
export type InsertDocument = z.infer<typeof insertDocumentSchema>;
export type SearchQuery = typeof searchQueries.$inferSelect;
export type InsertSearchQuery = z.infer<typeof insertSearchQuerySchema>;
export type SearchResult = typeof searchResults.$inferSelect;
export type InsertSearchResult = z.infer<typeof insertSearchResultSchema>;
export type Citation = typeof citations.$inferSelect;
export type InsertCitation = z.infer<typeof insertCitationSchema>;
// Search request/response types
export const searchRequestSchema = z.object({
query: z.string().min(1),
searchType: z.enum(["semantic", "keyword", "hybrid"]).default("semantic"),
filters: z.object({
sourceTypes: z.array(z.string()).optional(),
dateRange: z.object({
start: z.string().optional(),
end: z.string().optional(),
}).optional(),
}).optional(),
limit: z.number().min(1).max(50).default(10),
offset: z.number().min(0).default(0),
});
export type SearchRequest = z.infer<typeof searchRequestSchema>;
export interface SearchResponse {
results: Array<Document & {
relevanceScore: number;
snippet: string;
rank: number;
}>;
totalCount: number;
searchTime: number;
query: string;
queryId: number;
}
export interface DocumentWithContext extends Document {
relevanceScore: number;
snippet: string;
rank: number;
additionalContext?: Array<{
text: string;
section: string;
pageNumber?: number;
}>;
}
// File upload schemas
export const fileUploadSchema = z.object({
fileName: z.string().min(1),
fileSize: z.number().min(1),
mimeType: z.string().min(1),
title: z.string().optional(),
source: z.string().optional(),
});
export type FileUpload = z.infer<typeof fileUploadSchema>;
// Document processing schemas
export const documentProcessingSchema = z.object({
documentId: z.number(),
operations: z.array(z.enum(["extract_text", "build_index", "generate_embedding"])).default(["extract_text"]),
indexName: z.string().optional(),
});
export type DocumentProcessing = z.infer<typeof documentProcessingSchema>;
// Batch processing schemas
export const batchProcessingSchema = z.object({
documentIds: z.array(z.number()).min(1),
operations: z.array(z.enum(["extract_text", "build_index", "generate_embedding"])).default(["extract_text"]),
indexName: z.string().optional(),
});
export type BatchProcessing = z.infer<typeof batchProcessingSchema>;