Priyanka6 commited on
Commit
b623416
Β·
1 Parent(s): bf2cc50

Update space

Browse files
Files changed (1) hide show
  1. app.py +82 -131
app.py CHANGED
@@ -63,122 +63,29 @@
63
  # # if __name__ == "__main__":
64
  # # demo.launch()
65
 
66
- # import torch
67
- # import gradio as gr
68
- # from transformers import AutoModelForCausalLM, AutoTokenizer
69
- # import os
70
-
71
- # # Define model names
72
- # MODEL_1_PATH = "./adapter_model.safetensors" # Local path inside Space
73
- # MODEL_2_NAME = "sarvamai/sarvam-1" # The base model on Hugging Face Hub
74
-
75
- # # Load the tokenizer (same for both models)
76
- # TOKENIZER_NAME = "sarvamai/sarvam-1"
77
- # tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
78
- # def fix_checkpoint(model_path):
79
- # """Fixes the model checkpoint by adjusting mismatched weight dimensions."""
80
- # checkpoint_file = os.path.join(model_path, "pytorch_model.bin")
81
- # fixed_checkpoint_file = os.path.join(model_path, "pytorch_model_fixed.bin")
82
-
83
- # if not os.path.exists(checkpoint_file):
84
- # raise FileNotFoundError(f"Checkpoint file not found at: {checkpoint_file}")
85
-
86
- # print("Loading checkpoint for fixing...")
87
- # checkpoint = torch.load(checkpoint_file, map_location="cpu")
88
-
89
- # # Adjust weights (truncate the last token if mismatch)
90
- # if "base_model.model.lm_head.base_layer.weight" in checkpoint:
91
- # checkpoint["base_model.model.lm_head.base_layer.weight"] = checkpoint["base_model.model.lm_head.base_layer.weight"][:-1]
92
-
93
- # if "base_model.model.lm_head.lora_B.default.weight" in checkpoint:
94
- # checkpoint["base_model.model.lm_head.lora_B.default.weight"] = checkpoint["base_model.model.lm_head.lora_B.default.weight"][:-1]
95
-
96
- # # Save the fixed checkpoint
97
- # print("Saving fixed checkpoint...")
98
- # torch.save(checkpoint, fixed_checkpoint_file)
99
-
100
- # return fixed_checkpoint_file # Return the new file path
101
-
102
- # # Function to load a model
103
- # def load_model(model_choice):
104
- # if model_choice == "Hugging face dataset":
105
- # model = AutoModelForCausalLM.from_pretrained("./", torch_dtype=torch.float16, device_map="auto")
106
- # model.load_adapter(MODEL_1_PATH, "safe_tensors") # Load safetensors adapter
107
- # else:
108
- # model = AutoModelForCausalLM.from_pretrained(MODEL_2_NAME)
109
- # model.eval()
110
- # return model
111
-
112
- # # Load default model on startup
113
- # current_model = load_model("Hugging face dataset")
114
-
115
- # # Chatbot response function
116
- # def respond(message, history, model_choice, max_tokens, temperature, top_p):
117
- # global current_model
118
-
119
- # # Switch model if user selects a different one
120
- # if (model_choice == "Hugging face dataset" and current_model is not None and current_model.config.name_or_path != MODEL_1_PATH) or \
121
- # (model_choice == "Proprietary dataset1" and current_model is not None and current_model.config.name_or_path != MODEL_2_NAME):
122
- # current_model = load_model(model_choice)
123
-
124
- # # Convert chat history to format
125
- # messages = [{"role": "system", "content": "You are a friendly AI assistant."}]
126
- # for val in history:
127
- # if val[0]:
128
- # messages.append({"role": "user", "content": val[0]})
129
- # if val[1]:
130
- # messages.append({"role": "assistant", "content": val[1]})
131
- # messages.append({"role": "user", "content": message})
132
-
133
- # # Tokenize and generate response
134
- # inputs = tokenizer.apply_chat_template(messages, tokenize=False)
135
- # input_tokens = tokenizer(inputs, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
136
-
137
- # output_tokens = current_model.generate(
138
- # **input_tokens,
139
- # max_new_tokens=max_tokens,
140
- # temperature=temperature,
141
- # top_p=top_p,
142
- # pad_token_id=tokenizer.pad_token_id,
143
- # eos_token_id=tokenizer.eos_token_id,
144
- # )
145
-
146
- # response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
147
- # return response
148
-
149
- # # Define Gradio Chat Interface
150
- # demo = gr.ChatInterface(
151
- # fn=respond,
152
- # additional_inputs=[
153
- # gr.Dropdown(choices=["Hugging face dataset", "Proprietary dataset1"], value="Fine-Tuned Model", label="Select Model"),
154
- # gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max Tokens"),
155
- # gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
156
- # gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
157
- # ],
158
- # )
159
-
160
- # if __name__ == "__main__":
161
- # demo.launch()
162
-
163
  import torch
164
- import os
165
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
166
 
167
- # Define model and tokenizer paths
 
 
168
  MODEL_1_PATH = "Priyanka6/fine-tuning-inference"
169
- TOKENIZER_NAME = "sarvam/sarvam-1" # Keep this unchanged if tokenizer hasn't changed
 
 
170
 
171
  def trim_adapter_weights(model_path):
172
  """
173
  Trims the last token from the adapter's lm_head.lora_B.default.weight
174
  if there is a mismatch with the base model.
175
  """
176
- adapter_file = os.path.join(model_path, "adapter_model.safetensors")
177
 
178
- if not os.path.exists(adapter_file):
179
- raise FileNotFoundError(f"Adapter file not found: {adapter_file}")
180
 
181
- checkpoint = torch.load(adapter_file, map_location="cpu")
182
 
183
  key_to_trim = "lm_head.lora_B.default.weight"
184
 
@@ -195,36 +102,80 @@ def trim_adapter_weights(model_path):
195
  torch.save(checkpoint, trimmed_adapter_path)
196
  return trimmed_adapter_path
197
 
198
- return adapter_file
 
199
 
200
- # Before loading the adapter, trim it if necessary
201
- trimmed_adapter_path = trim_adapter_weights(MODEL_1_PATH)
202
-
203
- # Load the tokenizer
204
  tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
205
 
206
- # Load the model
207
- # model = AutoModelForCausalLM.from_pretrained(
208
- # MODEL_1_PATH, torch_dtype=torch.float16, device_map="auto"
209
- # )
210
-
211
- model = AutoModelForCausalLM.from_pretrained("Priyanka6/fine-tuning-inference", use_auth_token=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
- # Load the trimmed adapter
214
- model.load_adapter(trimmed_adapter_path, "safe_tensors")
215
 
216
- # Chat function
217
- def chat(query):
218
- inputs = tokenizer(query, return_tensors="pt").to("cuda")
219
- with torch.no_grad():
220
- output = model.generate(**inputs, max_new_tokens=100)
221
- return tokenizer.decode(output[0], skip_special_tokens=True)
222
 
223
- # Test the chatbot
224
- if __name__ == "__main__":
225
- while True:
226
- query = input("User: ")
227
- if query.lower() in ["exit", "quit"]:
228
- break
229
- response = chat(query)
230
- print(f"Bot: {response}")
 
63
  # # if __name__ == "__main__":
64
  # # demo.launch()
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  import torch
67
+ import gradio as gr
68
  from transformers import AutoModelForCausalLM, AutoTokenizer
69
+ import os
70
 
71
+ # Define model names
72
+ MODEL_1_PATH = "./adapter_model.safetensors" # Local path inside Space
73
+ ###
74
  MODEL_1_PATH = "Priyanka6/fine-tuning-inference"
75
+ ###
76
+ MODEL_2_NAME = "sarvamai/sarvam-1" # The base model on Hugging Face Hub
77
+ # MODEL_3_NAME =
78
 
79
  def trim_adapter_weights(model_path):
80
  """
81
  Trims the last token from the adapter's lm_head.lora_B.default.weight
82
  if there is a mismatch with the base model.
83
  """
 
84
 
85
+ if not os.path.exists(model_path):
86
+ raise FileNotFoundError(f"Adapter file not found: {model_path}")
87
 
88
+ checkpoint = torch.load(model_path, map_location="cpu")
89
 
90
  key_to_trim = "lm_head.lora_B.default.weight"
91
 
 
102
  torch.save(checkpoint, trimmed_adapter_path)
103
  return trimmed_adapter_path
104
 
105
+ return model_path
106
+ # trimmed_adapter_path = trim_adapter_weights(MODEL_1_PATH)
107
 
108
+ # Load the tokenizer (same for both models)
109
+ TOKENIZER_NAME = "sarvamai/sarvam-1"
 
 
110
  tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
111
 
112
+ # Function to load a model
113
+ def load_model(model_choice,trimmed_adapter_path):
114
+ if model_choice == "Hugging face dataset":
115
+ model = AutoModelForCausalLM.from_pretrained("./", torch_dtype=torch.float16, device_map="auto")
116
+ model.load_adapter(trimmed_adapter_path, "safe_tensors") # Load safetensors adapter
117
+ else:
118
+ model = AutoModelForCausalLM.from_pretrained(MODEL_2_NAME)
119
+ model.eval()
120
+ return model
121
+
122
+ # Load default model on startup
123
+ current_model = load_model("Hugging face dataset")
124
+
125
+ # Chatbot response function
126
+ def respond(message, history, model_choice, max_tokens, temperature, top_p):
127
+ global current_model
128
+
129
+ # Switch model if user selects a different one
130
+ if (model_choice == "Hugging face dataset" and current_model is not None and current_model.config.name_or_path != MODEL_1_PATH) or \
131
+ (model_choice == "Proprietary dataset1" and current_model is not None and current_model.config.name_or_path != MODEL_2_NAME):
132
+ current_model = load_model(model_choice)
133
+
134
+ # Convert chat history to format
135
+ messages = [{"role": "system", "content": "You are a friendly AI assistant."}]
136
+ for val in history:
137
+ if val[0]:
138
+ messages.append({"role": "user", "content": val[0]})
139
+ if val[1]:
140
+ messages.append({"role": "assistant", "content": val[1]})
141
+ messages.append({"role": "user", "content": message})
142
+
143
+ # Tokenize and generate response
144
+ inputs = tokenizer.apply_chat_template(messages, tokenize=False)
145
+ input_tokens = tokenizer(inputs, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
146
+
147
+ output_tokens = current_model.generate(
148
+ **input_tokens,
149
+ max_new_tokens=max_tokens,
150
+ temperature=temperature,
151
+ top_p=top_p,
152
+ pad_token_id=tokenizer.pad_token_id,
153
+ eos_token_id=tokenizer.eos_token_id,
154
+ )
155
+
156
+ response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
157
+ return response
158
+
159
+ # Define Gradio Chat Interface
160
+ demo = gr.ChatInterface(
161
+ fn=respond,
162
+ additional_inputs=[
163
+ gr.Dropdown(choices=["Hugging face dataset", "Proprietary dataset1"], value="Fine-Tuned Model", label="Select Model"),
164
+ gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max Tokens"),
165
+ gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
166
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
167
+ ],
168
+ )
169
 
170
+ if __name__ == "__main__":
171
+ demo.launch()
172
 
 
 
 
 
 
 
173
 
174
+ # # Test the chatbot
175
+ # if __name__ == "__main__":
176
+ # while True:
177
+ # query = input("User: ")
178
+ # if query.lower() in ["exit", "quit"]:
179
+ # break
180
+ # response = chat(query)
181
+ # print(f"Bot: {response}")