Spaces:

filipsedivy
/

boost-space-llm

Running

File size: 2,637 Bytes

7d587fb
eb8aef0
 
f0a5e1c
eb8aef0
 
 
 
51657ce
7d587fb
 
 
eb8aef0
 
7d587fb
fc6868b
 
 
 
 
 
 
 
eb8aef0
 
ac6cd22
 
fc6868b
ac6cd22
d458310
ac6cd22
fc6868b
ac6cd22
 
 
 
 
 
 
 
 
 
 
fc6868b
ac6cd22
 
fc6868b
ac6cd22
 
 
fc6868b
ac6cd22
7d587fb
fc6868b
 
 
 
 
 
 
 
 
 
 
 
7d587fb
fc6868b
 
7d587fb
 
 
 
15d6540
7d587fb
 
 
eb8aef0

import gradio as gr
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from huggingface_hub import InferenceClient

embeddings = SentenceTransformerEmbeddings(model_name="msmarco-distilbert-base-v4")
db = Chroma(persist_directory="embeddings", embedding_function=embeddings)

client = InferenceClient(model="mistralai/Mixtral-8x7B-Instruct-v0.1")


def respond(
        message,
        history: list[tuple[str, str]],
):
    messages = []

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    matching_docs = db.similarity_search(message)

    if not matching_docs:
        prompt = (
            f"<s>[INST] You are an expert in generating responses when there is no information available. "
            f"Unfortunately, there are no relevant documents available to answer the following query:\n\n"
            f"Query: {message}\n\n"
            f"Please provide a polite and original response to inform the user that the requested information is not "
            f"available.[/INST]</s>"
        )
    else:
        context = ""
        current_length = 0
        for i, doc in enumerate(matching_docs):
            doc_text = f"Document {i + 1}:\n{doc.page_content}\n\n"
            doc_length = len(doc_text.split())
            context += doc_text
            current_length += doc_length

        prompt = (
            f"<s>[INST] You are an expert in summarizing and answering questions based on given documents. "
            f"You're an expert in English grammar at the same time. "
            f"This means that your texts are flawless, correct and grammatically correct."
            f"Never write in the output response what document the response is in. It looks very unprofessional."
            f"Please provide a detailed and well-explained answer to the following query in 4-6 sentences:\n\n"
            f"Query: {message}\n\n"
            f"Based on the following documents:\n{context}\n\n"
            f"Answer:[/INST]</s>"
        )

    messages.append({"role": "user", "content": prompt})

    response = ""

    for message in client.chat_completion(
            messages,
            max_tokens=250,
            stream=True,
            temperature=0.7,
            top_p=0.95,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response


demo = gr.ChatInterface(
    respond,
    title="Boost.space Docs LLM",
)

if __name__ == "__main__":
    demo.launch()