File size: 9,775 Bytes
85e10d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20fa628
85e10d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
import gradio as gr
import spaces
import os
import logging
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from huggingface_hub import InferenceClient, get_token

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Set HF_HOME for caching Hugging Face assets in persistent storage
os.environ["HF_HOME"] = "/data/.huggingface"
os.makedirs(os.environ["HF_HOME"], exist_ok=True)

# Define persistent storage directories
DATA_DIR = "/data"  # Root persistent storage directory
DOCS_DIR = os.path.join(DATA_DIR, "documents")  # Subdirectory for uploaded PDFs
CHROMA_DIR = os.path.join(DATA_DIR, "chroma_db")  # Subdirectory for Chroma vector store

# Create directories if they don't exist
os.makedirs(DOCS_DIR, exist_ok=True)
os.makedirs(CHROMA_DIR, exist_ok=True)

# Initialize Cerebras InferenceClient
try:
    token = get_token()
    if not token:
        logger.error("HF_TOKEN is not set in Space secrets")
        client = None
    else:
        client = InferenceClient(
            model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
            provider="cerebras",
            token=token
        )
        logger.info("InferenceClient initialized successfully")
except Exception as e:
    logger.error(f"Failed to initialize InferenceClient: {str(e)}")
    client = None

# Global variables for vector store
vectorstore = None
retriever = None

@spaces.GPU(duration=180)  # Use ZeroGPU (H200) for embedding generation, 180s timeout
def initialize_rag(file):
    global vectorstore, retriever
    try:
        # Debug file object properties
        logger.info(f"File object: {type(file)}, Attributes: {dir(file)}")
        logger.info(f"File name: {file.name}")

        # Validate file
        if not file or not file.name:
            logger.error("No file provided or invalid file name")
            return "Error: No file provided or invalid file name"
        
        # Verify temporary file exists and is accessible
        if not os.path.exists(file.name):
            logger.error(f"Temporary file {file.name} does not exist")
            return f"Error: Temporary file {file.name} does not exist"
        
        # Check temporary file size
        file_size = os.path.getsize(file.name)
        logger.info(f"Temporary file size: {file_size} bytes")
        if file_size == 0:
            logger.error("Uploaded file is empty")
            return "Error: Uploaded file is empty"

        # Save uploaded file to persistent storage
        file_name = os.path.basename(file.name)
        file_path = os.path.join(DOCS_DIR, file_name)
        
        # Check if file exists and its size
        should_save = True
        if os.path.exists(file_path):
            existing_size = os.path.getsize(file_path)
            logger.info(f"Existing file {file_name} size: {existing_size} bytes")
            if existing_size == 0:
                logger.warning(f"Existing file {file_name} is empty, will overwrite")
            else:
                logger.info(f"File {file_name} already exists and is not empty, skipping save")
                should_save = False

        if should_save:
            try:
                with open(file.name, "rb") as src_file:
                    file_content = src_file.read()
                logger.info(f"Read {len(file_content)} bytes from temporary file")
                if not file_content:
                    logger.error("File content is empty after reading")
                    return "Error: File content is empty after reading"
                with open(file_path, "wb") as dst_file:
                    dst_file.write(file_content)
                    dst_file.flush()  # Ensure write completes
                # Verify written file
                written_size = os.path.getsize(file_path)
                logger.info(f"Saved {file_name} to {file_path}, size: {written_size} bytes")
                if written_size == 0:
                    logger.error(f"Failed to write {file_name}, file is empty")
                    return f"Error: Failed to write {file_name}, file is empty"
            except PermissionError as e:
                logger.error(f"Permission error writing to {file_path}: {str(e)}")
                return f"Error: Permission denied writing to {file_path}"
            except Exception as e:
                logger.error(f"Error writing file to {file_path}: {str(e)}")
                return f"Error writing file: {str(e)}"

        # Load and split document
        try:
            loader = PyPDFLoader(file_path)
            documents = loader.load()
            if not documents:
                logger.error("No content loaded from PDF")
                return "Error: No content loaded from PDF"
            text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
            texts = text_splitter.split_documents(documents)
        except Exception as e:
            logger.error(f"Error loading PDF: {str(e)}")
            return f"Error loading PDF: {str(e)}"

        # Create or update embeddings and vector store
        try:
            logger.info("Initializing HuggingFaceEmbeddings")
            embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
            logger.info("Creating Chroma vector store")
            vectorstore = Chroma.from_documents(
                texts, embeddings, persist_directory=CHROMA_DIR
            )
            vectorstore.persist()  # Save to persistent storage
            retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
            logger.info(f"Vector store created and persisted to {CHROMA_DIR}")
            return f"Document '{file_name}' processed and saved to {DOCS_DIR}!"
        except Exception as e:
            logger.error(f"Error in embeddings or Chroma: {str(e)}")
            return f"Error processing embeddings: {str(e)}"
    except Exception as e:
        logger.error(f"Error processing document: {str(e)}")
        return f"Error processing document: {str(e)}"

def query_documents(query, history, system_prompt, max_tokens, temperature):
    global retriever, client
    try:
        if client is None:
            logger.error("InferenceClient not initialized")
            return history, "Error: InferenceClient not initialized. Check HF_TOKEN."
        if retriever is None:
            logger.error("No documents loaded")
            return history, "Error: No documents loaded. Please upload a document first."

        # Ensure history is a list of [user, assistant] lists
        logger.info(f"History before processing: {history}")
        if not isinstance(history, list):
            logger.warning("History is not a list, resetting")
            history = []
        history = [[str(item[0]), str(item[1])] for item in history if isinstance(item, (list, tuple)) and len(item) == 2]

        # Retrieve relevant documents
        docs = retriever.get_relevant_documents(query)
        context = "\n".join([doc.page_content for doc in docs])

        # Call Cerebras inference
        logger.info("Calling Cerebras inference")
        response = client.chat_completion(
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"Context: {context}\n\nQuery: {query}"}
            ],
            max_tokens=int(max_tokens),
            temperature=float(temperature),
            stream=False
        )
        answer = response.choices[0].message.content
        logger.info("Inference successful")

        # Update chat history with list format
        history.append([query, answer])
        logger.info(f"History after append: {history}")
        return history, history
    except Exception as e:
        logger.error(f"Error querying documents: {str(e)}")
        return history, f"Error querying documents: {str(e)}"

# Load existing vector store on startup
try:
    if os.path.exists(CHROMA_DIR):
        logger.info("Loading existing vector store")
        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        vectorstore = Chroma(persist_directory=CHROMA_DIR, embedding_function=embeddings)
        retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
        logger.info(f"Loaded vector store from {CHROMA_DIR}")
except Exception as e:
    logger.error(f"Error loading vector store: {str(e)}")

with gr.Blocks() as demo:
    gr.Markdown("# RAG chatbot w/persistent storage (works best with CPU Upgrade)")

    # File upload
    file_input = gr.File(label="Upload Document (PDF)", file_types=[".pdf"])
    file_output = gr.Textbox(label="Upload Status")
    file_input.upload(initialize_rag, file_input, file_output)

    # Chat interface
    chatbot = gr.Chatbot(label="Conversation")

    # Query and parameters
    with gr.Row():
        query_input = gr.Textbox(label="Query", placeholder="Ask about the document...")
        system_prompt = gr.Textbox(
            label="System Prompt",
            value="You are a helpful assistant answering questions based on the provided document context."
        )
        max_tokens = gr.Slider(label="Max Tokens", minimum=50, maximum=2000, value=500, step=50)
        temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.7, step=0.1)

    # Submit button
    submit_btn = gr.Button("Send")
    submit_btn.click(
        query_documents,
        inputs=[query_input, chatbot, system_prompt, max_tokens, temperature],
        outputs=[gr.Chatbot(), gr.Textbox()]
    )

if __name__ == "__main__":
    demo.launch()