Priyanka6 commited on
Commit
ec6e229
Β·
1 Parent(s): 006aba1

Update space

Browse files
Files changed (1) hide show
  1. app.py +198 -131
app.py CHANGED
@@ -1,161 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # import gradio as gr
2
- # from huggingface_hub import InferenceClient
3
-
4
- # """
5
- # For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- # """
7
- # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- # def respond(
11
- # message,
12
- # history: list[tuple[str, str]],
13
- # system_message,
14
- # max_tokens,
15
- # temperature,
16
- # top_p,
17
- # ):
18
- # messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
 
 
20
  # for val in history:
21
  # if val[0]:
22
  # messages.append({"role": "user", "content": val[0]})
23
  # if val[1]:
24
  # messages.append({"role": "assistant", "content": val[1]})
25
-
26
  # messages.append({"role": "user", "content": message})
27
 
28
- # response = ""
 
 
29
 
30
- # for message in client.chat_completion(
31
- # messages,
32
- # max_tokens=max_tokens,
33
- # stream=True,
34
  # temperature=temperature,
35
  # top_p=top_p,
36
- # ):
37
- # token = message.choices[0].delta.content
 
38
 
39
- # response += token
40
- # yield response
41
 
42
-
43
- # """
44
- # For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- # """
46
  # demo = gr.ChatInterface(
47
- # respond,
48
  # additional_inputs=[
49
- # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- # gr.Slider(
53
- # minimum=0.1,
54
- # maximum=1.0,
55
- # value=0.95,
56
- # step=0.05,
57
- # label="Top-p (nucleus sampling)",
58
- # ),
59
  # ],
60
  # )
61
 
62
-
63
  # if __name__ == "__main__":
64
  # demo.launch()
65
 
66
  import torch
67
- import gradio as gr
68
- from transformers import AutoModelForCausalLM, AutoTokenizer
69
  import os
 
70
 
71
- # Define model names
72
- MODEL_1_PATH = "./adapter_model.safetensors" # Local path inside Space
73
- MODEL_2_NAME = "sarvamai/sarvam-1" # The base model on Hugging Face Hub
74
 
75
- # Load the tokenizer (same for both models)
76
- TOKENIZER_NAME = "sarvamai/sarvam-1"
77
- tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
78
- def fix_checkpoint(model_path):
79
- """Fixes the model checkpoint by adjusting mismatched weight dimensions."""
80
- checkpoint_file = os.path.join(model_path, "pytorch_model.bin")
81
- fixed_checkpoint_file = os.path.join(model_path, "pytorch_model_fixed.bin")
82
-
83
- if not os.path.exists(checkpoint_file):
84
- raise FileNotFoundError(f"Checkpoint file not found at: {checkpoint_file}")
85
-
86
- print("Loading checkpoint for fixing...")
87
- checkpoint = torch.load(checkpoint_file, map_location="cpu")
88
-
89
- # Adjust weights (truncate the last token if mismatch)
90
- if "base_model.model.lm_head.base_layer.weight" in checkpoint:
91
- checkpoint["base_model.model.lm_head.base_layer.weight"] = checkpoint["base_model.model.lm_head.base_layer.weight"][:-1]
92
-
93
- if "base_model.model.lm_head.lora_B.default.weight" in checkpoint:
94
- checkpoint["base_model.model.lm_head.lora_B.default.weight"] = checkpoint["base_model.model.lm_head.lora_B.default.weight"][:-1]
95
-
96
- # Save the fixed checkpoint
97
- print("Saving fixed checkpoint...")
98
- torch.save(checkpoint, fixed_checkpoint_file)
99
-
100
- return fixed_checkpoint_file # Return the new file path
101
-
102
- # Function to load a model
103
- def load_model(model_choice):
104
- if model_choice == "Hugging face dataset":
105
- model = AutoModelForCausalLM.from_pretrained("./", torch_dtype=torch.float16, device_map="auto")
106
- model.load_adapter(MODEL_1_PATH, "safe_tensors") # Load safetensors adapter
107
- else:
108
- model = AutoModelForCausalLM.from_pretrained(MODEL_2_NAME)
109
- model.eval()
110
- return model
111
-
112
- # Load default model on startup
113
- current_model = load_model("Hugging face dataset")
114
-
115
- # Chatbot response function
116
- def respond(message, history, model_choice, max_tokens, temperature, top_p):
117
- global current_model
118
 
119
- # Switch model if user selects a different one
120
- if (model_choice == "Hugging face dataset" and current_model is not None and current_model.config.name_or_path != MODEL_1_PATH) or \
121
- (model_choice == "Proprietary dataset1" and current_model is not None and current_model.config.name_or_path != MODEL_2_NAME):
122
- current_model = load_model(model_choice)
123
-
124
- # Convert chat history to format
125
- messages = [{"role": "system", "content": "You are a friendly AI assistant."}]
126
- for val in history:
127
- if val[0]:
128
- messages.append({"role": "user", "content": val[0]})
129
- if val[1]:
130
- messages.append({"role": "assistant", "content": val[1]})
131
- messages.append({"role": "user", "content": message})
132
-
133
- # Tokenize and generate response
134
- inputs = tokenizer.apply_chat_template(messages, tokenize=False)
135
- input_tokens = tokenizer(inputs, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
136
-
137
- output_tokens = current_model.generate(
138
- **input_tokens,
139
- max_new_tokens=max_tokens,
140
- temperature=temperature,
141
- top_p=top_p,
142
- pad_token_id=tokenizer.pad_token_id,
143
- eos_token_id=tokenizer.eos_token_id,
144
- )
145
-
146
- response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
147
- return response
148
-
149
- # Define Gradio Chat Interface
150
- demo = gr.ChatInterface(
151
- fn=respond,
152
- additional_inputs=[
153
- gr.Dropdown(choices=["Hugging face dataset", "Proprietary dataset1"], value="Fine-Tuned Model", label="Select Model"),
154
- gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max Tokens"),
155
- gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
156
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
157
- ],
158
  )
159
 
 
 
 
 
 
 
 
 
 
 
 
160
  if __name__ == "__main__":
161
- demo.launch()
 
 
 
 
 
 
1
+ # # import gradio as gr
2
+ # # from huggingface_hub import InferenceClient
3
+
4
+ # # """
5
+ # # For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
+ # # """
7
+ # # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
+
9
+
10
+ # # def respond(
11
+ # # message,
12
+ # # history: list[tuple[str, str]],
13
+ # # system_message,
14
+ # # max_tokens,
15
+ # # temperature,
16
+ # # top_p,
17
+ # # ):
18
+ # # messages = [{"role": "system", "content": system_message}]
19
+
20
+ # # for val in history:
21
+ # # if val[0]:
22
+ # # messages.append({"role": "user", "content": val[0]})
23
+ # # if val[1]:
24
+ # # messages.append({"role": "assistant", "content": val[1]})
25
+
26
+ # # messages.append({"role": "user", "content": message})
27
+
28
+ # # response = ""
29
+
30
+ # # for message in client.chat_completion(
31
+ # # messages,
32
+ # # max_tokens=max_tokens,
33
+ # # stream=True,
34
+ # # temperature=temperature,
35
+ # # top_p=top_p,
36
+ # # ):
37
+ # # token = message.choices[0].delta.content
38
+
39
+ # # response += token
40
+ # # yield response
41
+
42
+
43
+ # # """
44
+ # # For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
+ # # """
46
+ # # demo = gr.ChatInterface(
47
+ # # respond,
48
+ # # additional_inputs=[
49
+ # # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
+ # # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
+ # # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
+ # # gr.Slider(
53
+ # # minimum=0.1,
54
+ # # maximum=1.0,
55
+ # # value=0.95,
56
+ # # step=0.05,
57
+ # # label="Top-p (nucleus sampling)",
58
+ # # ),
59
+ # # ],
60
+ # # )
61
+
62
+
63
+ # # if __name__ == "__main__":
64
+ # # demo.launch()
65
+
66
+ # import torch
67
  # import gradio as gr
68
+ # from transformers import AutoModelForCausalLM, AutoTokenizer
69
+ # import os
70
+
71
+ # # Define model names
72
+ # MODEL_1_PATH = "./adapter_model.safetensors" # Local path inside Space
73
+ # MODEL_2_NAME = "sarvamai/sarvam-1" # The base model on Hugging Face Hub
74
+
75
+ # # Load the tokenizer (same for both models)
76
+ # TOKENIZER_NAME = "sarvamai/sarvam-1"
77
+ # tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
78
+ # def fix_checkpoint(model_path):
79
+ # """Fixes the model checkpoint by adjusting mismatched weight dimensions."""
80
+ # checkpoint_file = os.path.join(model_path, "pytorch_model.bin")
81
+ # fixed_checkpoint_file = os.path.join(model_path, "pytorch_model_fixed.bin")
82
+
83
+ # if not os.path.exists(checkpoint_file):
84
+ # raise FileNotFoundError(f"Checkpoint file not found at: {checkpoint_file}")
85
+
86
+ # print("Loading checkpoint for fixing...")
87
+ # checkpoint = torch.load(checkpoint_file, map_location="cpu")
88
+
89
+ # # Adjust weights (truncate the last token if mismatch)
90
+ # if "base_model.model.lm_head.base_layer.weight" in checkpoint:
91
+ # checkpoint["base_model.model.lm_head.base_layer.weight"] = checkpoint["base_model.model.lm_head.base_layer.weight"][:-1]
92
+
93
+ # if "base_model.model.lm_head.lora_B.default.weight" in checkpoint:
94
+ # checkpoint["base_model.model.lm_head.lora_B.default.weight"] = checkpoint["base_model.model.lm_head.lora_B.default.weight"][:-1]
95
+
96
+ # # Save the fixed checkpoint
97
+ # print("Saving fixed checkpoint...")
98
+ # torch.save(checkpoint, fixed_checkpoint_file)
99
+
100
+ # return fixed_checkpoint_file # Return the new file path
101
+
102
+ # # Function to load a model
103
+ # def load_model(model_choice):
104
+ # if model_choice == "Hugging face dataset":
105
+ # model = AutoModelForCausalLM.from_pretrained("./", torch_dtype=torch.float16, device_map="auto")
106
+ # model.load_adapter(MODEL_1_PATH, "safe_tensors") # Load safetensors adapter
107
+ # else:
108
+ # model = AutoModelForCausalLM.from_pretrained(MODEL_2_NAME)
109
+ # model.eval()
110
+ # return model
111
+
112
+ # # Load default model on startup
113
+ # current_model = load_model("Hugging face dataset")
114
+
115
+ # # Chatbot response function
116
+ # def respond(message, history, model_choice, max_tokens, temperature, top_p):
117
+ # global current_model
118
+
119
+ # # Switch model if user selects a different one
120
+ # if (model_choice == "Hugging face dataset" and current_model is not None and current_model.config.name_or_path != MODEL_1_PATH) or \
121
+ # (model_choice == "Proprietary dataset1" and current_model is not None and current_model.config.name_or_path != MODEL_2_NAME):
122
+ # current_model = load_model(model_choice)
123
 
124
+ # # Convert chat history to format
125
+ # messages = [{"role": "system", "content": "You are a friendly AI assistant."}]
126
  # for val in history:
127
  # if val[0]:
128
  # messages.append({"role": "user", "content": val[0]})
129
  # if val[1]:
130
  # messages.append({"role": "assistant", "content": val[1]})
 
131
  # messages.append({"role": "user", "content": message})
132
 
133
+ # # Tokenize and generate response
134
+ # inputs = tokenizer.apply_chat_template(messages, tokenize=False)
135
+ # input_tokens = tokenizer(inputs, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
136
 
137
+ # output_tokens = current_model.generate(
138
+ # **input_tokens,
139
+ # max_new_tokens=max_tokens,
 
140
  # temperature=temperature,
141
  # top_p=top_p,
142
+ # pad_token_id=tokenizer.pad_token_id,
143
+ # eos_token_id=tokenizer.eos_token_id,
144
+ # )
145
 
146
+ # response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
147
+ # return response
148
 
149
+ # # Define Gradio Chat Interface
 
 
 
150
  # demo = gr.ChatInterface(
151
+ # fn=respond,
152
  # additional_inputs=[
153
+ # gr.Dropdown(choices=["Hugging face dataset", "Proprietary dataset1"], value="Fine-Tuned Model", label="Select Model"),
154
+ # gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max Tokens"),
155
+ # gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
156
+ # gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
 
 
 
 
 
 
157
  # ],
158
  # )
159
 
 
160
  # if __name__ == "__main__":
161
  # demo.launch()
162
 
163
  import torch
 
 
164
  import os
165
+ from transformers import AutoModelForCausalLM, AutoTokenizer
166
 
167
+ # Define model and tokenizer paths
168
+ MODEL_1_PATH = "Priyanka6/fine-tuning-inference"
169
+ TOKENIZER_NAME = "sarvam/sarvam-1" # Keep this unchanged if tokenizer hasn't changed
170
 
171
+ def trim_adapter_weights(model_path):
172
+ """
173
+ Trims the last token from the adapter's lm_head.lora_B.default.weight
174
+ if there is a mismatch with the base model.
175
+ """
176
+ adapter_file = os.path.join(model_path, "adapter_model.safetensors")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
+ if not os.path.exists(adapter_file):
179
+ raise FileNotFoundError(f"Adapter file not found: {adapter_file}")
180
+
181
+ checkpoint = torch.load(adapter_file, map_location="cpu")
182
+
183
+ key_to_trim = "lm_head.lora_B.default.weight"
184
+
185
+ if key_to_trim in checkpoint:
186
+ original_size = checkpoint[key_to_trim].shape[0]
187
+ expected_size = original_size - 1 # Removing last token
188
+
189
+ print(f"Trimming {key_to_trim}: {original_size} -> {expected_size}")
190
+
191
+ checkpoint[key_to_trim] = checkpoint[key_to_trim][:-1] # Trim the last row
192
+
193
+ # Save the modified adapter
194
+ trimmed_adapter_path = os.path.join(model_path, "adapter_model_trimmed.safetensors")
195
+ torch.save(checkpoint, trimmed_adapter_path)
196
+ return trimmed_adapter_path
197
+
198
+ return adapter_file
199
+
200
+ # Before loading the adapter, trim it if necessary
201
+ trimmed_adapter_path = trim_adapter_weights(MODEL_1_PATH)
202
+
203
+ # Load the tokenizer
204
+ tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
205
+
206
+ # Load the model
207
+ model = AutoModelForCausalLM.from_pretrained(
208
+ MODEL_1_PATH, torch_dtype=torch.float16, device_map="auto"
 
 
 
 
 
 
 
 
209
  )
210
 
211
+ # Load the trimmed adapter
212
+ model.load_adapter(trimmed_adapter_path, "safe_tensors")
213
+
214
+ # Chat function
215
+ def chat(query):
216
+ inputs = tokenizer(query, return_tensors="pt").to("cuda")
217
+ with torch.no_grad():
218
+ output = model.generate(**inputs, max_new_tokens=100)
219
+ return tokenizer.decode(output[0], skip_special_tokens=True)
220
+
221
+ # Test the chatbot
222
  if __name__ == "__main__":
223
+ while True:
224
+ query = input("User: ")
225
+ if query.lower() in ["exit", "quit"]:
226
+ break
227
+ response = chat(query)
228
+ print(f"Bot: {response}")