import os import sys from langchain.chains import ConversationalRetrievalChain from langchain.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import Chroma from langchain.embeddings import HuggingFaceEmbeddings from langchain.llms.base import LLM from huggingface_hub import InferenceClient import gradio as gr # workaround for sqlite in HF spaces __import__('pysqlite3') sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') # 📄 Load documents docs = [] for f in os.listdir("multiple_docs"): if f.endswith(".pdf"): loader = PyPDFLoader(os.path.join("multiple_docs", f)) docs.extend(loader.load()) elif f.endswith(".docx") or f.endswith(".doc"): loader = Docx2txtLoader(os.path.join("multiple_docs", f)) docs.extend(loader.load()) elif f.endswith(".txt"): loader = TextLoader(os.path.join("multiple_docs", f)) docs.extend(loader.load()) # 🔗 Split into chunks splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10) docs = splitter.split_documents(docs) texts = [doc.page_content for doc in docs] metadatas = [{"id": i} for i in range(len(texts))] # 🧠 Embeddings embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # 🗃️ Vectorstore vectorstore = Chroma( persist_directory="./db", embedding_function=embedding_function ) vectorstore.add_texts(texts=texts, metadatas=metadatas) vectorstore.persist() # 🔐 Get HF token from env variable HF_API_KEY = os.getenv("HF_API_KEY") if HF_API_KEY is None: raise ValueError("HF_API_KEY environment variable is not set.") HF_MODEL = "deepseek-ai/deepseek-llm-7b-instruct" # or any other hosted model # 🤖 Create InferenceClient bound to model client = InferenceClient(model=HF_MODEL, token=HF_API_KEY) # 🔷 Wrap HF client into LangChain LLM interface class HuggingFaceInferenceLLM(LLM): """LLM that queries HuggingFace Inference API.""" client: InferenceClient = client def _call(self, prompt, stop=None, run_manager=None, **kwargs): response = self.client.text_generation( prompt, max_new_tokens=512, temperature=0.7, do_sample=True, ) return response @property def _llm_type(self) -> str: return "huggingface_inference_api" llm = HuggingFaceInferenceLLM() # 🔗 Conversational chain chain = ConversationalRetrievalChain.from_llm( llm, retriever=vectorstore.as_retriever(search_kwargs={'k': 6}), return_source_documents=True, verbose=False ) # 💬 Gradio UI chat_history = [] with gr.Blocks() as demo: chatbot = gr.Chatbot( [("", "Hello, I'm Thierry Decae's chatbot. Ask me about my experience, skills, eligibility, etc.")], avatar_images=["./multiple_docs/Guest.jpg", "./multiple_docs/Thierry Picture.jpg"] ) msg = gr.Textbox(placeholder="Type your question here...") clear = gr.Button("Clear") def user(query, chat_history): chat_history_tuples = [(m[0], m[1]) for m in chat_history] result = chain({"question": query, "chat_history": chat_history_tuples}) chat_history.append((query, result["answer"])) return gr.update(value=""), chat_history msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False) clear.click(lambda: None, None, chatbot, queue=False) demo.launch(debug=True) # remove share=True if running in HF Spaces