SlouchyBuffalo commited on
Commit
85e10d0
·
verified ·
1 Parent(s): dc6bbf5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +229 -0
app.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ import os
4
+ import logging
5
+ from langchain.document_loaders import PyPDFLoader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.embeddings import HuggingFaceEmbeddings
8
+ from langchain.vectorstores import Chroma
9
+ from huggingface_hub import InferenceClient, get_token
10
+
11
+ # Set up logging
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # Set HF_HOME for caching Hugging Face assets in persistent storage
16
+ os.environ["HF_HOME"] = "/data/.huggingface"
17
+ os.makedirs(os.environ["HF_HOME"], exist_ok=True)
18
+
19
+ # Define persistent storage directories
20
+ DATA_DIR = "/data" # Root persistent storage directory
21
+ DOCS_DIR = os.path.join(DATA_DIR, "documents") # Subdirectory for uploaded PDFs
22
+ CHROMA_DIR = os.path.join(DATA_DIR, "chroma_db") # Subdirectory for Chroma vector store
23
+
24
+ # Create directories if they don't exist
25
+ os.makedirs(DOCS_DIR, exist_ok=True)
26
+ os.makedirs(CHROMA_DIR, exist_ok=True)
27
+
28
+ # Initialize Cerebras InferenceClient
29
+ try:
30
+ token = get_token()
31
+ if not token:
32
+ logger.error("HF_TOKEN is not set in Space secrets")
33
+ client = None
34
+ else:
35
+ client = InferenceClient(
36
+ model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
37
+ provider="cerebras",
38
+ token=token
39
+ )
40
+ logger.info("InferenceClient initialized successfully")
41
+ except Exception as e:
42
+ logger.error(f"Failed to initialize InferenceClient: {str(e)}")
43
+ client = None
44
+
45
+ # Global variables for vector store
46
+ vectorstore = None
47
+ retriever = None
48
+
49
+ @spaces.GPU(duration=180) # Use ZeroGPU (H200) for embedding generation, 180s timeout
50
+ def initialize_rag(file):
51
+ global vectorstore, retriever
52
+ try:
53
+ # Debug file object properties
54
+ logger.info(f"File object: {type(file)}, Attributes: {dir(file)}")
55
+ logger.info(f"File name: {file.name}")
56
+
57
+ # Validate file
58
+ if not file or not file.name:
59
+ logger.error("No file provided or invalid file name")
60
+ return "Error: No file provided or invalid file name"
61
+
62
+ # Verify temporary file exists and is accessible
63
+ if not os.path.exists(file.name):
64
+ logger.error(f"Temporary file {file.name} does not exist")
65
+ return f"Error: Temporary file {file.name} does not exist"
66
+
67
+ # Check temporary file size
68
+ file_size = os.path.getsize(file.name)
69
+ logger.info(f"Temporary file size: {file_size} bytes")
70
+ if file_size == 0:
71
+ logger.error("Uploaded file is empty")
72
+ return "Error: Uploaded file is empty"
73
+
74
+ # Save uploaded file to persistent storage
75
+ file_name = os.path.basename(file.name)
76
+ file_path = os.path.join(DOCS_DIR, file_name)
77
+
78
+ # Check if file exists and its size
79
+ should_save = True
80
+ if os.path.exists(file_path):
81
+ existing_size = os.path.getsize(file_path)
82
+ logger.info(f"Existing file {file_name} size: {existing_size} bytes")
83
+ if existing_size == 0:
84
+ logger.warning(f"Existing file {file_name} is empty, will overwrite")
85
+ else:
86
+ logger.info(f"File {file_name} already exists and is not empty, skipping save")
87
+ should_save = False
88
+
89
+ if should_save:
90
+ try:
91
+ with open(file.name, "rb") as src_file:
92
+ file_content = src_file.read()
93
+ logger.info(f"Read {len(file_content)} bytes from temporary file")
94
+ if not file_content:
95
+ logger.error("File content is empty after reading")
96
+ return "Error: File content is empty after reading"
97
+ with open(file_path, "wb") as dst_file:
98
+ dst_file.write(file_content)
99
+ dst_file.flush() # Ensure write completes
100
+ # Verify written file
101
+ written_size = os.path.getsize(file_path)
102
+ logger.info(f"Saved {file_name} to {file_path}, size: {written_size} bytes")
103
+ if written_size == 0:
104
+ logger.error(f"Failed to write {file_name}, file is empty")
105
+ return f"Error: Failed to write {file_name}, file is empty"
106
+ except PermissionError as e:
107
+ logger.error(f"Permission error writing to {file_path}: {str(e)}")
108
+ return f"Error: Permission denied writing to {file_path}"
109
+ except Exception as e:
110
+ logger.error(f"Error writing file to {file_path}: {str(e)}")
111
+ return f"Error writing file: {str(e)}"
112
+
113
+ # Load and split document
114
+ try:
115
+ loader = PyPDFLoader(file_path)
116
+ documents = loader.load()
117
+ if not documents:
118
+ logger.error("No content loaded from PDF")
119
+ return "Error: No content loaded from PDF"
120
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
121
+ texts = text_splitter.split_documents(documents)
122
+ except Exception as e:
123
+ logger.error(f"Error loading PDF: {str(e)}")
124
+ return f"Error loading PDF: {str(e)}"
125
+
126
+ # Create or update embeddings and vector store
127
+ try:
128
+ logger.info("Initializing HuggingFaceEmbeddings")
129
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
130
+ logger.info("Creating Chroma vector store")
131
+ vectorstore = Chroma.from_documents(
132
+ texts, embeddings, persist_directory=CHROMA_DIR
133
+ )
134
+ vectorstore.persist() # Save to persistent storage
135
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
136
+ logger.info(f"Vector store created and persisted to {CHROMA_DIR}")
137
+ return f"Document '{file_name}' processed and saved to {DOCS_DIR}!"
138
+ except Exception as e:
139
+ logger.error(f"Error in embeddings or Chroma: {str(e)}")
140
+ return f"Error processing embeddings: {str(e)}"
141
+ except Exception as e:
142
+ logger.error(f"Error processing document: {str(e)}")
143
+ return f"Error processing document: {str(e)}"
144
+
145
+ def query_documents(query, history, system_prompt, max_tokens, temperature):
146
+ global retriever, client
147
+ try:
148
+ if client is None:
149
+ logger.error("InferenceClient not initialized")
150
+ return history, "Error: InferenceClient not initialized. Check HF_TOKEN."
151
+ if retriever is None:
152
+ logger.error("No documents loaded")
153
+ return history, "Error: No documents loaded. Please upload a document first."
154
+
155
+ # Ensure history is a list of [user, assistant] lists
156
+ logger.info(f"History before processing: {history}")
157
+ if not isinstance(history, list):
158
+ logger.warning("History is not a list, resetting")
159
+ history = []
160
+ history = [[str(item[0]), str(item[1])] for item in history if isinstance(item, (list, tuple)) and len(item) == 2]
161
+
162
+ # Retrieve relevant documents
163
+ docs = retriever.get_relevant_documents(query)
164
+ context = "\n".join([doc.page_content for doc in docs])
165
+
166
+ # Call Cerebras inference
167
+ logger.info("Calling Cerebras inference")
168
+ response = client.chat_completion(
169
+ messages=[
170
+ {"role": "system", "content": system_prompt},
171
+ {"role": "user", "content": f"Context: {context}\n\nQuery: {query}"}
172
+ ],
173
+ max_tokens=int(max_tokens),
174
+ temperature=float(temperature),
175
+ stream=False
176
+ )
177
+ answer = response.choices[0].message.content
178
+ logger.info("Inference successful")
179
+
180
+ # Update chat history with list format
181
+ history.append([query, answer])
182
+ logger.info(f"History after append: {history}")
183
+ return history, history
184
+ except Exception as e:
185
+ logger.error(f"Error querying documents: {str(e)}")
186
+ return history, f"Error querying documents: {str(e)}"
187
+
188
+ # Load existing vector store on startup
189
+ try:
190
+ if os.path.exists(CHROMA_DIR):
191
+ logger.info("Loading existing vector store")
192
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
193
+ vectorstore = Chroma(persist_directory=CHROMA_DIR, embedding_function=embeddings)
194
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
195
+ logger.info(f"Loaded vector store from {CHROMA_DIR}")
196
+ except Exception as e:
197
+ logger.error(f"Error loading vector store: {str(e)}")
198
+
199
+ with gr.Blocks() as demo:
200
+ gr.Markdown("# RAG chatbot w/persistent storage (works best with CPU Upgrade)")
201
+
202
+ # File upload
203
+ file_input = gr.File(label="Upload Document (PDF)", file_types=[".pdf"])
204
+ file_output = gr.Textbox(label="Upload Status")
205
+ file_input.upload(initialize_rag, file_input, file_output)
206
+
207
+ # Chat interface
208
+ chatbot = gr.Chatbot(label="Conversation")
209
+
210
+ # Query and parameters
211
+ with gr.Row():
212
+ query_input = gr.Textbox(label="Query", placeholder="Ask about the document...")
213
+ system_prompt = gr.Textbox(
214
+ label="System Prompt",
215
+ value="You are a helpful assistant answering questions based on the provided document context."
216
+ )
217
+ max_tokens = gr.Slider(label="Max Tokens", minimum=50, maximum=2000, value=500, step=50)
218
+ temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.7, step=0.1)
219
+
220
+ # Submit button
221
+ submit_btn = gr.Button("Send")
222
+ submit_btn.click(
223
+ query_documents,
224
+ inputs=[query_input, chatbot, system_prompt, max_tokens, temperature],
225
+ outputs=[gr.Chatbot(), gr.Textbox()]
226
+ )
227
+
228
+ if __name__ == "__main__":
229
+ demo.launch()