File size: 5,191 Bytes
7c012de 10ac46e 7c012de 10ac46e 7c012de 10ac46e 7c012de 10ac46e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import { pgTable, text, serial, integer, boolean, timestamp, real, jsonb } from "drizzle-orm/pg-core";
import { createInsertSchema } from "drizzle-zod";
import { z } from "zod";
export const documents = pgTable("documents", {
id: serial("id").primaryKey(),
title: text("title").notNull(),
content: text("content").notNull(),
source: text("source").notNull(),
sourceType: text("source_type").notNull(), // pdf, web, code, academic, image
url: text("url"),
metadata: jsonb("metadata"), // author, date, tags, etc.
embedding: text("embedding"), // vector embedding as JSON string
filePath: text("file_path"), // local file path for uploaded files
fileName: text("file_name"), // original file name
fileSize: integer("file_size"), // file size in bytes
mimeType: text("mime_type"), // MIME type of uploaded file
processingStatus: text("processing_status").notNull().default("pending"), // pending, processing, completed, failed
modalTaskId: text("modal_task_id"), // Modal processing task ID
createdAt: timestamp("created_at").defaultNow().notNull(),
processedAt: timestamp("processed_at"),
});
export const searchQueries = pgTable("search_queries", {
id: serial("id").primaryKey(),
query: text("query").notNull(),
searchType: text("search_type").notNull().default("semantic"), // semantic, keyword, hybrid
filters: jsonb("filters"),
resultsCount: integer("results_count").default(0),
searchTime: real("search_time"), // in seconds
createdAt: timestamp("created_at").defaultNow().notNull(),
});
export const searchResults = pgTable("search_results", {
id: serial("id").primaryKey(),
queryId: integer("query_id").references(() => searchQueries.id).notNull(),
documentId: integer("document_id").references(() => documents.id).notNull(),
relevanceScore: real("relevance_score").notNull(),
snippet: text("snippet").notNull(),
rank: integer("rank").notNull(),
});
export const citations = pgTable("citations", {
id: serial("id").primaryKey(),
documentId: integer("document_id").references(() => documents.id).notNull(),
citationText: text("citation_text").notNull(),
pageNumber: integer("page_number"),
section: text("section"),
createdAt: timestamp("created_at").defaultNow().notNull(),
});
// Insert schemas
export const insertDocumentSchema = createInsertSchema(documents).omit({
id: true,
createdAt: true,
});
export const insertSearchQuerySchema = createInsertSchema(searchQueries).omit({
id: true,
createdAt: true,
});
export const insertSearchResultSchema = createInsertSchema(searchResults).omit({
id: true,
});
export const insertCitationSchema = createInsertSchema(citations).omit({
id: true,
createdAt: true,
});
// Types
export type Document = typeof documents.$inferSelect;
export type InsertDocument = z.infer<typeof insertDocumentSchema>;
export type SearchQuery = typeof searchQueries.$inferSelect;
export type InsertSearchQuery = z.infer<typeof insertSearchQuerySchema>;
export type SearchResult = typeof searchResults.$inferSelect;
export type InsertSearchResult = z.infer<typeof insertSearchResultSchema>;
export type Citation = typeof citations.$inferSelect;
export type InsertCitation = z.infer<typeof insertCitationSchema>;
// Search request/response types
export const searchRequestSchema = z.object({
query: z.string().min(1),
searchType: z.enum(["semantic", "keyword", "hybrid"]).default("semantic"),
filters: z.object({
sourceTypes: z.array(z.string()).optional(),
dateRange: z.object({
start: z.string().optional(),
end: z.string().optional(),
}).optional(),
}).optional(),
limit: z.number().min(1).max(50).default(10),
offset: z.number().min(0).default(0),
});
export type SearchRequest = z.infer<typeof searchRequestSchema>;
export interface SearchResponse {
results: Array<Document & {
relevanceScore: number;
snippet: string;
rank: number;
}>;
totalCount: number;
searchTime: number;
query: string;
queryId: number;
}
export interface DocumentWithContext extends Document {
relevanceScore: number;
snippet: string;
rank: number;
additionalContext?: Array<{
text: string;
section: string;
pageNumber?: number;
}>;
}
// File upload schemas
export const fileUploadSchema = z.object({
fileName: z.string().min(1),
fileSize: z.number().min(1),
mimeType: z.string().min(1),
title: z.string().optional(),
source: z.string().optional(),
});
export type FileUpload = z.infer<typeof fileUploadSchema>;
// Document processing schemas
export const documentProcessingSchema = z.object({
documentId: z.number(),
operations: z.array(z.enum(["extract_text", "build_index", "generate_embedding"])).default(["extract_text"]),
indexName: z.string().optional(),
});
export type DocumentProcessing = z.infer<typeof documentProcessingSchema>;
// Batch processing schemas
export const batchProcessingSchema = z.object({
documentIds: z.array(z.number()).min(1),
operations: z.array(z.enum(["extract_text", "build_index", "generate_embedding"])).default(["extract_text"]),
indexName: z.string().optional(),
});
export type BatchProcessing = z.infer<typeof batchProcessingSchema>;
|