Spaces:
Running
Running
File size: 2,637 Bytes
7d587fb eb8aef0 f0a5e1c eb8aef0 51657ce 7d587fb eb8aef0 7d587fb fc6868b eb8aef0 ac6cd22 fc6868b ac6cd22 d458310 ac6cd22 fc6868b ac6cd22 fc6868b ac6cd22 fc6868b ac6cd22 fc6868b ac6cd22 7d587fb fc6868b 7d587fb fc6868b 7d587fb 15d6540 7d587fb eb8aef0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import gradio as gr
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from huggingface_hub import InferenceClient
embeddings = SentenceTransformerEmbeddings(model_name="msmarco-distilbert-base-v4")
db = Chroma(persist_directory="embeddings", embedding_function=embeddings)
client = InferenceClient(model="mistralai/Mixtral-8x7B-Instruct-v0.1")
def respond(
message,
history: list[tuple[str, str]],
):
messages = []
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
matching_docs = db.similarity_search(message)
if not matching_docs:
prompt = (
f"<s>[INST] You are an expert in generating responses when there is no information available. "
f"Unfortunately, there are no relevant documents available to answer the following query:\n\n"
f"Query: {message}\n\n"
f"Please provide a polite and original response to inform the user that the requested information is not "
f"available.[/INST]</s>"
)
else:
context = ""
current_length = 0
for i, doc in enumerate(matching_docs):
doc_text = f"Document {i + 1}:\n{doc.page_content}\n\n"
doc_length = len(doc_text.split())
context += doc_text
current_length += doc_length
prompt = (
f"<s>[INST] You are an expert in summarizing and answering questions based on given documents. "
f"You're an expert in English grammar at the same time. "
f"This means that your texts are flawless, correct and grammatically correct."
f"Never write in the output response what document the response is in. It looks very unprofessional."
f"Please provide a detailed and well-explained answer to the following query in 4-6 sentences:\n\n"
f"Query: {message}\n\n"
f"Based on the following documents:\n{context}\n\n"
f"Answer:[/INST]</s>"
)
messages.append({"role": "user", "content": prompt})
response = ""
for message in client.chat_completion(
messages,
max_tokens=250,
stream=True,
temperature=0.7,
top_p=0.95,
):
token = message.choices[0].delta.content
response += token
yield response
demo = gr.ChatInterface(
respond,
title="Boost.space Docs LLM",
)
if __name__ == "__main__":
demo.launch()
|