File size: 4,657 Bytes
76945ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1edb934
76945ee
 
 
 
 
 
1edb934
76945ee
 
 
1edb934
76945ee
 
 
 
 
 
 
 
 
1edb934
76945ee
 
1edb934
76945ee
 
 
 
 
 
 
 
 
 
 
1edb934
 
 
 
 
 
 
76945ee
 
 
1edb934
76945ee
 
 
1edb934
 
76945ee
 
 
1edb934
76945ee
 
1edb934
76945ee
1edb934
76945ee
1edb934
76945ee
 
 
 
1edb934
 
 
 
76945ee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# import gradio as gr
# from huggingface_hub import InferenceClient

# """
# For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
# """
# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")


# def respond(
#     message,
#     history: list[tuple[str, str]],
#     system_message,
#     max_tokens,
#     temperature,
#     top_p,
# ):
#     messages = [{"role": "system", "content": system_message}]

#     for val in history:
#         if val[0]:
#             messages.append({"role": "user", "content": val[0]})
#         if val[1]:
#             messages.append({"role": "assistant", "content": val[1]})

#     messages.append({"role": "user", "content": message})

#     response = ""

#     for message in client.chat_completion(
#         messages,
#         max_tokens=max_tokens,
#         stream=True,
#         temperature=temperature,
#         top_p=top_p,
#     ):
#         token = message.choices[0].delta.content

#         response += token
#         yield response


# """
# For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
# """
# demo = gr.ChatInterface(
#     respond,
#     additional_inputs=[
#         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
#         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
#         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
#         gr.Slider(
#             minimum=0.1,
#             maximum=1.0,
#             value=0.95,
#             step=0.05,
#             label="Top-p (nucleus sampling)",
#         ),
#     ],
# )


# if __name__ == "__main__":
#     demo.launch()

import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import os

# Define model names
MODEL_1_PATH = "adapter_model.safetensors"  # Your fine-tuned model
MODEL_2_NAME = "sarvamai/sarvam-1"  # The base model on Hugging Face Hub

# Load the tokenizer (same for both models)
TOKENIZER_NAME = "sarvamai/sarvam-1"
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)

# Function to load a model
def load_model(model_choice):
    if model_choice == "Hugging face dataset":
        model = AutoModelForCausalLM.from_pretrained(TOKENIZER_NAME)
        model.load_adapter(MODEL_1_PATH, "safe_tensors")  # Load safetensors adapter
    else:
        model = AutoModelForCausalLM.from_pretrained(MODEL_2_NAME)
    model.eval()
    return model

# Load default model on startup
current_model = load_model("Hugging face dataset")

# Chatbot response function
def respond(message, history, model_choice, max_tokens, temperature, top_p):
    global current_model
    
    # Switch model if user selects a different one
    if (model_choice == "Hugging face dataset" and current_model is not None and current_model.config.name_or_path != MODEL_1_PATH) or \
       (model_choice == "Proprietary dataset1" and current_model is not None and current_model.config.name_or_path != MODEL_2_NAME):
        current_model = load_model(model_choice)

    # Convert chat history to format
    messages = [{"role": "system", "content": "You are a friendly AI assistant."}]
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
    messages.append({"role": "user", "content": message})

    # Tokenize and generate response
    inputs = tokenizer.apply_chat_template(messages, tokenize=False)
    input_tokens = tokenizer(inputs, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")

    output_tokens = current_model.generate(
        **input_tokens,
        max_new_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )

    response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
    return response

# Define Gradio Chat Interface
demo = gr.ChatInterface(
    fn=respond,
    additional_inputs=[
        gr.Dropdown(choices=["Hugging face dataset", "Proprietary dataset1"], value="Fine-Tuned Model", label="Select Model"),
        gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max Tokens"),
        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
    ],
)

if __name__ == "__main__":
    demo.launch()