Spaces:

brendon-ai
/

faq-huggingface-model

Sleeping

App Files Files Community

brendon-ai commited on Jul 4

Commit

9fb2feb

verified ·

1 Parent(s): 41eefcc

Update src/RAGSample.py

Browse files

Files changed (1) hide show

src/RAGSample.py +97 -45

src/RAGSample.py CHANGED Viewed

@@ -370,33 +370,62 @@ Answer:
         input_variables=["question", "documents"],
     )
-    tokenizer = AutoTokenizer.from_pretrained("microsoft/BioGPT")
-    model = AutoModelForCausalLM.from_pretrained(
-        "microsoft/BioGPT",
-        device_map="auto",
-        torch_dtype=torch.float16
-    )
-    # Fix the tokenizer configuration
-    if tokenizer.pad_token is None:
-        tokenizer.pad_token = tokenizer.eos_token
-    # Initialize a local Hugging Face model
-    hf_pipeline = pipeline(
-        "text-generation",
-        model=model,
-        tokenizer=tokenizer,
-        max_new_tokens=100,      # Reduced for stability
-        max_length=1024,         # BioGPT's context length
-        temperature=0.2,         # Lower for more focused responses
-        device_map="auto",
-        torch_dtype=torch.float16,
-        return_full_text=False,
-        truncation=True,
-        do_sample=True,
-        pad_token_id=1,
-        eos_token_id=2
-    )
     # Wrap it in LangChain
     llm = HuggingFacePipeline(pipeline=hf_pipeline)
@@ -404,7 +433,8 @@ Answer:
     # Create a chain combining the prompt template and LLM
     return prompt | llm | StrOutputParser()
-# Define the RAG application class
 class RAGApplication:
     def __init__(self, retriever: BaseRetriever, rag_chain: Runnable):
         self.retriever = retriever
@@ -412,23 +442,45 @@ class RAGApplication:
     def run(self, question: str) -> str:
         """Runs the RAG pipeline for a given question."""
-        # Retrieve relevant documents
-        documents = self.retriever.invoke(question)
-        # Debug: Print retrieved documents
-        print(f"\nDEBUG: Retrieved {len(documents)} documents for question: '{question}'")
-        for i, doc in enumerate(documents):
-            print(f"DEBUG: Document {i+1}: {doc.page_content[:200]}...")
-        # Extract content from retrieved documents
-        doc_texts = "\n\n".join([doc.page_content for doc in documents])
-        # Debug: Print the combined document text
-        print(f"DEBUG: Combined document text: {doc_texts[:300]}...")
-        # Get the answer from the language model
-        answer = self.rag_chain.invoke({"question": question, "documents": doc_texts})
-        return answer
 # Main execution block
 if __name__ == "__main__":

         input_variables=["question", "documents"],
     )
+    try:
+        tokenizer = AutoTokenizer.from_pretrained("microsoft/BioGPT")
+        model = AutoModelForCausalLM.from_pretrained(
+            "microsoft/BioGPT",
+            device_map="auto",
+            torch_dtype=torch.float16
+        )
+        # Fix the tokenizer configuration properly
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+        print(f"Tokenizer pad_token_id: {tokenizer.pad_token_id}")
+        print(f"Tokenizer eos_token_id: {tokenizer.eos_token_id}")
+        # Initialize pipeline with correct token IDs from tokenizer
+        hf_pipeline = pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            max_new_tokens=50,       # Start small for testing
+            temperature=0.2,
+            return_full_text=False,
+            do_sample=True,
+            # Use actual tokenizer token IDs, not hardcoded values
+            pad_token_id=tokenizer.pad_token_id,
+            eos_token_id=tokenizer.eos_token_id,
+            clean_up_tokenization_spaces=True
+        )
+        # Test the pipeline with a simple input
+        test_input = "What is diabetes?"
+        print(f"Testing pipeline with: {test_input}")
+        test_result = hf_pipeline(test_input)
+        print(f"Pipeline test successful: {test_result}")
+    except Exception as e:
+        print(f"Error setting up BioGPT: {e}")
+        print("Falling back to DistilGPT-2...")
+        # Fallback to a more stable model
+        hf_pipeline = pipeline(
+            "text-generation",
+            model="distilgpt2",
+            max_new_tokens=50,
+            temperature=0.2,
+            return_full_text=False,
+            do_sample=True,
+            clean_up_tokenization_spaces=True
+        )
+        # Test the fallback pipeline
+        test_input = "What is diabetes?"
+        print(f"Testing fallback pipeline with: {test_input}")
+        test_result = hf_pipeline(test_input)
+        print(f"Fallback pipeline test successful: {test_result}")
     # Wrap it in LangChain
     llm = HuggingFacePipeline(pipeline=hf_pipeline)
     # Create a chain combining the prompt template and LLM
     return prompt | llm | StrOutputParser()
+# Also update the RAG application class with better error handling
 class RAGApplication:
     def __init__(self, retriever: BaseRetriever, rag_chain: Runnable):
         self.retriever = retriever
     def run(self, question: str) -> str:
         """Runs the RAG pipeline for a given question."""
+        try:
+            # Input validation
+            if not question or not question.strip():
+                return "Please provide a valid question."
+            question = question.strip()
+            print(f"\nProcessing question: '{question}'")
+            # Retrieve relevant documents
+            documents = self.retriever.invoke(question)
+            # Debug: Print retrieved documents
+            print(f"DEBUG: Retrieved {len(documents)} documents")
+            for i, doc in enumerate(documents):
+                print(f"DEBUG: Document {i+1}: {doc.page_content[:200]}...")
+            # Extract content from retrieved documents
+            doc_texts = "\n\n".join([doc.page_content for doc in documents])
+            # Limit the total input length to prevent token overflow
+            max_input_length = 500  # Conservative limit
+            if len(doc_texts) > max_input_length:
+                doc_texts = doc_texts[:max_input_length] + "..."
+                print(f"DEBUG: Truncated document text to {max_input_length} characters")
+            print(f"DEBUG: Combined document text length: {len(doc_texts)}")
+            # Get the answer from the language model
+            print("DEBUG: Calling language model...")
+            answer = self.rag_chain.invoke({"question": question, "documents": doc_texts})
+            print(f"DEBUG: Language model response: {answer}")
+            return answer
+        except Exception as e:
+            print(f"Error in RAG application: {str(e)}")
+            import traceback
+            traceback.print_exc()
+            return f"I apologize, but I encountered an error processing your question: {str(e)}. Please try rephrasing it or ask a different question."
 # Main execution block
 if __name__ == "__main__":