Spaces:

brendon-ai
/

faq-huggingface-model

Sleeping

App Files Files Community

brendon-ai commited on Jul 4

Commit

88e455a

verified ·

1 Parent(s): d1cd2dc

Update src/RAGSample.py

Browse files

Files changed (1) hide show

src/RAGSample.py +43 -7

src/RAGSample.py CHANGED Viewed

@@ -317,6 +317,32 @@ def setup_retriever(use_kaggle_data: bool = False, kaggle_dataset: Optional[str]
     print("Retriever setup complete.")
     return vectorstore.as_retriever(k=4)
 def setup_rag_chain() -> Runnable:
     """Sets up the RAG chain with a prompt template and an LLM."""
     # Define the prompt template for the LLM
@@ -332,17 +358,27 @@ Answer:
         input_variables=["question", "documents"],
     )
-    # Initialize the LLM with dolphin-llama3:8b model
-    # Note: This requires the Ollama server to be running with the specified model
-    llm = ChatOllama(
-        model="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
-        temperature=0,
     )
     # Create a chain combining the prompt template and LLM
     return prompt | llm | StrOutputParser()
 # Define the RAG application class
 class RAGApplication:
     def __init__(self, retriever: BaseRetriever, rag_chain: Runnable):

     print("Retriever setup complete.")
     return vectorstore.as_retriever(k=4)
+# def setup_rag_chain() -> Runnable:
+#     """Sets up the RAG chain with a prompt template and an LLM."""
+#     # Define the prompt template for the LLM
+#     prompt = PromptTemplate(
+#         template="""You are an assistant for question-answering tasks.
+# Use the following documents to answer the question.
+# If you don't know the answer, just say that you don't know.
+# Use three sentences maximum and keep the answer concise:
+# Question: {question}
+# Documents: {documents}
+# Answer:
+# """,
+#         input_variables=["question", "documents"],
+#     )
+#     # Initialize the LLM with dolphin-llama3:8b model
+#     # Note: This requires the Ollama server to be running with the specified model
+#     llm = ChatOllama(
+#         model="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
+#         temperature=0,
+#     )
+#     # Create a chain combining the prompt template and LLM
+#     return prompt | llm | StrOutputParser()
 def setup_rag_chain() -> Runnable:
     """Sets up the RAG chain with a prompt template and an LLM."""
     # Define the prompt template for the LLM
         input_variables=["question", "documents"],
     )
+    # OPTION 1: Use Hugging Face Pipeline (Recommended for HF Spaces)
+    from transformers import pipeline
+    from langchain.llms import HuggingFacePipeline
+    # Initialize a local Hugging Face model
+    hf_pipeline = pipeline(
+        "text-generation",
+        model="microsoft/DialoGPT-medium",  # Good for Q&A tasks
+        tokenizer="microsoft/DialoGPT-medium",
+        max_length=512,
+        temperature=0.1,
+        device=0 if torch.cuda.is_available() else -1,
+        return_full_text=False
     )
+    # Wrap it in LangChain
+    llm = HuggingFacePipeline(pipeline=hf_pipeline)
     # Create a chain combining the prompt template and LLM
     return prompt | llm | StrOutputParser()
 # Define the RAG application class
 class RAGApplication:
     def __init__(self, retriever: BaseRetriever, rag_chain: Runnable):