Priyanka6 commited on
Commit
c17980d
Β·
1 Parent(s): 95936d3

Update space

Browse files
Files changed (1) hide show
  1. app.py +59 -144
app.py CHANGED
@@ -1,141 +1,65 @@
1
- # # import gradio as gr
2
- # # from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- # # """
5
- # # For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- # # """
7
- # # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- # # def respond(
11
- # # message,
12
- # # history: list[tuple[str, str]],
13
- # # system_message,
14
- # # max_tokens,
15
- # # temperature,
16
- # # top_p,
17
- # # ):
18
- # # messages = [{"role": "system", "content": system_message}]
19
-
20
- # # for val in history:
21
- # # if val[0]:
22
- # # messages.append({"role": "user", "content": val[0]})
23
- # # if val[1]:
24
- # # messages.append({"role": "assistant", "content": val[1]})
25
-
26
- # # messages.append({"role": "user", "content": message})
27
-
28
- # # response = ""
29
-
30
- # # for message in client.chat_completion(
31
- # # messages,
32
- # # max_tokens=max_tokens,
33
- # # stream=True,
34
- # # temperature=temperature,
35
- # # top_p=top_p,
36
- # # ):
37
- # # token = message.choices[0].delta.content
38
-
39
- # # response += token
40
- # # yield response
41
-
42
-
43
- # # """
44
- # # For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- # # """
46
- # # demo = gr.ChatInterface(
47
- # # respond,
48
- # # additional_inputs=[
49
- # # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- # # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- # # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- # # gr.Slider(
53
- # # minimum=0.1,
54
- # # maximum=1.0,
55
- # # value=0.95,
56
- # # step=0.05,
57
- # # label="Top-p (nucleus sampling)",
58
- # # ),
59
- # # ],
60
- # # )
61
-
62
-
63
- # # if __name__ == "__main__":
64
- # # demo.launch()
65
 
66
  import torch
67
  import gradio as gr
68
  from transformers import AutoModelForCausalLM, AutoTokenizer
69
- import os
70
- from safetensors.torch import load_file, save_file
71
 
72
- # Define model names
73
- # MODEL_1_PATH = "./adapter_model.safetensors" # Local path inside Space
74
- ###
75
- MODEL_1_PATH = "Priyanka6/fine-tuning-inference"
76
- ###
77
- MODEL_2_NAME = "sarvamai/sarvam-1" # The base model on Hugging Face Hub
78
- # MODEL_3_NAME =
79
-
80
- def trim_adapter_weights(model_path):
81
- """
82
- Trims the last token from the adapter's lm_head.lora_B.default.weight
83
- if there is a mismatch with the base model.
84
- """
85
- model_path = "./adapter_model.safetensors"
86
- # if not os.path.exists(model_path):
87
- # raise FileNotFoundError(f"Adapter file not found: {model_path}")
88
-
89
- checkpoint = load_file(model_path)
90
- print("Keys in checkpoint:", list(checkpoint.keys()))
91
-
92
- key_to_trim = "lm_head.lora_B.default.weight"
93
-
94
- if key_to_trim in checkpoint:
95
- print("Entered")
96
- original_size = checkpoint[key_to_trim].shape[0]
97
- expected_size = original_size - 1 # Removing last token
98
-
99
- print(f"Trimming {key_to_trim}: {original_size} -> {expected_size}")
100
-
101
- checkpoint[key_to_trim] = checkpoint[key_to_trim][:-1] # Trim the last row
102
-
103
- # Save the modified adapter
104
- trimmed_adapter_path = os.path.join(model_path, "adapter_model_trimmed.safetensors")
105
- save_file(checkpoint, trimmed_adapter_path)
106
- return trimmed_adapter_path
107
- print("didn't execute the if block!")
108
- return model_path
109
- model_path=os.path.join(MODEL_1_PATH,"adapter_model.safetensors")
110
- trimmed_adapter_path = trim_adapter_weights(model_path)
111
-
112
- # Load the tokenizer (same for both models)
113
- TOKENIZER_NAME = "sarvamai/sarvam-1"
114
- tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
115
-
116
- # Function to load a model
117
- def load_model(model_choice):
118
- if model_choice == "Hugging face dataset":
119
- model = AutoModelForCausalLM.from_pretrained("./", torch_dtype=torch.float16, device_map="auto")
120
- trimmed_adapter_path = os.path.join("Priyanka6/fine-tuning-inference", "adapter_model_trimmed.safetensors")
121
- model.load_adapter(trimmed_adapter_path, "safe_tensors") # Load safetensors adapter
122
- else:
123
- model = AutoModelForCausalLM.from_pretrained(MODEL_2_NAME)
124
- model.eval()
125
- return model
126
-
127
- # Load default model on startup
128
- current_model = load_model("Hugging face dataset")
129
-
130
- # Chatbot response function
131
- def respond(message, history, model_choice, max_tokens, temperature, top_p):
132
- global current_model
133
-
134
- # Switch model if user selects a different one
135
- if (model_choice == "Hugging face dataset" and current_model is not None and current_model.config.name_or_path != MODEL_1_PATH) or \
136
- (model_choice == "Proprietary dataset1" and current_model is not None and current_model.config.name_or_path != MODEL_2_NAME):
137
- current_model = load_model(model_choice)
138
 
 
139
  # Convert chat history to format
140
  messages = [{"role": "system", "content": "You are a friendly AI assistant."}]
141
  for val in history:
@@ -149,7 +73,7 @@ def respond(message, history, model_choice, max_tokens, temperature, top_p):
149
  inputs = tokenizer.apply_chat_template(messages, tokenize=False)
150
  input_tokens = tokenizer(inputs, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
151
 
152
- output_tokens = current_model.generate(
153
  **input_tokens,
154
  max_new_tokens=max_tokens,
155
  temperature=temperature,
@@ -165,22 +89,13 @@ def respond(message, history, model_choice, max_tokens, temperature, top_p):
165
  demo = gr.ChatInterface(
166
  fn=respond,
167
  additional_inputs=[
168
- gr.Dropdown(choices=["Hugging face dataset", "Proprietary dataset1"], value="Fine-Tuned Model", label="Select Model"),
169
  gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max Tokens"),
170
  gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
171
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
172
  ],
 
 
173
  )
174
 
175
  if __name__ == "__main__":
176
- demo.launch()
177
-
178
-
179
- # # Test the chatbot
180
- # if __name__ == "__main__":
181
- # while True:
182
- # query = input("User: ")
183
- # if query.lower() in ["exit", "quit"]:
184
- # break
185
- # response = chat(query)
186
- # print(f"Bot: {response}")
 
1
+ # import torch
2
+ # import gradio as gr
3
+ # from transformers import AutoModelForCausalLM, AutoTokenizer
4
+
5
+ # # Load the model and tokenizer
6
+ # MODEL_NAME = "sarvamai/sarvam-1"
7
+ # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
8
+ # model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
9
+ # model.eval()
10
+
11
+ # def respond(message, history, max_tokens, temperature, top_p):
12
+ # # Convert chat history to format
13
+ # messages = [{"role": "system", "content": "You are a friendly AI assistant."}]
14
+ # for val in history:
15
+ # if val[0]:
16
+ # messages.append({"role": "user", "content": val[0]})
17
+ # if val[1]:
18
+ # messages.append({"role": "assistant", "content": val[1]})
19
+ # messages.append({"role": "user", "content": message})
20
+
21
+ # # Tokenize and generate response
22
+ # inputs = tokenizer.apply_chat_template(messages, tokenize=False)
23
+ # input_tokens = tokenizer(inputs, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
24
+
25
+ # output_tokens = model.generate(
26
+ # **input_tokens,
27
+ # max_new_tokens=max_tokens,
28
+ # temperature=temperature,
29
+ # top_p=top_p,
30
+ # pad_token_id=tokenizer.pad_token_id,
31
+ # eos_token_id=tokenizer.eos_token_id,
32
+ # )
33
+
34
+ # response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
35
+ # return response
36
+
37
+ # # Define Gradio Chat Interface
38
+ # demo = gr.ChatInterface(
39
+ # fn=respond,
40
+ # additional_inputs=[
41
+ # gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max Tokens"),
42
+ # gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
43
+ # gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
44
+ # ],
45
+ # title="Sarvam-1 Chat Interface",
46
+ # description="Chat with the Sarvam-1 language model"
47
+ # )
48
 
49
+ # if __name__ == "__main__":
50
+ # demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  import torch
53
  import gradio as gr
54
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
55
 
56
+ # Load the model and tokenizer
57
+ MODEL_NAME = "sarvamai/sarvam-1"
58
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
59
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
60
+ model.eval()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ def respond(message, history, max_tokens, temperature, top_p):
63
  # Convert chat history to format
64
  messages = [{"role": "system", "content": "You are a friendly AI assistant."}]
65
  for val in history:
 
73
  inputs = tokenizer.apply_chat_template(messages, tokenize=False)
74
  input_tokens = tokenizer(inputs, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
75
 
76
+ output_tokens = model.generate(
77
  **input_tokens,
78
  max_new_tokens=max_tokens,
79
  temperature=temperature,
 
89
  demo = gr.ChatInterface(
90
  fn=respond,
91
  additional_inputs=[
 
92
  gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max Tokens"),
93
  gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
94
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
95
  ],
96
+ title="Sarvam-1 Chat Interface",
97
+ description="Chat with the Sarvam-1 language model"
98
  )
99
 
100
  if __name__ == "__main__":
101
+ demo.launch()