Spaces:

Priyanka6
/

fine-tuning-inference

Runtime error

App Files Files Community

Priyanka6 commited on Feb 25

Commit

b623416

1 Parent(s): bf2cc50

Update space

Browse files

Files changed (1) hide show

app.py +82 -131

app.py CHANGED Viewed

@@ -63,122 +63,29 @@
 # # if __name__ == "__main__":
 # #     demo.launch()
-# import torch
-# import gradio as gr
-# from transformers import AutoModelForCausalLM, AutoTokenizer
-# import os
-# # Define model names
-# MODEL_1_PATH = "./adapter_model.safetensors"  # Local path inside Space
-# MODEL_2_NAME = "sarvamai/sarvam-1"  # The base model on Hugging Face Hub
-# # Load the tokenizer (same for both models)
-# TOKENIZER_NAME = "sarvamai/sarvam-1"
-# tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
-# def fix_checkpoint(model_path):
-#     """Fixes the model checkpoint by adjusting mismatched weight dimensions."""
-#     checkpoint_file = os.path.join(model_path, "pytorch_model.bin")
-#     fixed_checkpoint_file = os.path.join(model_path, "pytorch_model_fixed.bin")
-#     if not os.path.exists(checkpoint_file):
-#         raise FileNotFoundError(f"Checkpoint file not found at: {checkpoint_file}")
-#     print("Loading checkpoint for fixing...")
-#     checkpoint = torch.load(checkpoint_file, map_location="cpu")
-#     # Adjust weights (truncate the last token if mismatch)
-#     if "base_model.model.lm_head.base_layer.weight" in checkpoint:
-#         checkpoint["base_model.model.lm_head.base_layer.weight"] = checkpoint["base_model.model.lm_head.base_layer.weight"][:-1]
-#     if "base_model.model.lm_head.lora_B.default.weight" in checkpoint:
-#         checkpoint["base_model.model.lm_head.lora_B.default.weight"] = checkpoint["base_model.model.lm_head.lora_B.default.weight"][:-1]
-#     # Save the fixed checkpoint
-#     print("Saving fixed checkpoint...")
-#     torch.save(checkpoint, fixed_checkpoint_file)
-#     return fixed_checkpoint_file  # Return the new file path
-# # Function to load a model
-# def load_model(model_choice):
-#     if model_choice == "Hugging face dataset":
-#         model = AutoModelForCausalLM.from_pretrained("./", torch_dtype=torch.float16, device_map="auto")
-#         model.load_adapter(MODEL_1_PATH, "safe_tensors")  # Load safetensors adapter
-#     else:
-#         model = AutoModelForCausalLM.from_pretrained(MODEL_2_NAME)
-#     model.eval()
-#     return model
-# # Load default model on startup
-# current_model = load_model("Hugging face dataset")
-# # Chatbot response function
-# def respond(message, history, model_choice, max_tokens, temperature, top_p):
-#     global current_model
-#     # Switch model if user selects a different one
-#     if (model_choice == "Hugging face dataset" and current_model is not None and current_model.config.name_or_path != MODEL_1_PATH) or \
-#        (model_choice == "Proprietary dataset1" and current_model is not None and current_model.config.name_or_path != MODEL_2_NAME):
-#         current_model = load_model(model_choice)
-#     # Convert chat history to format
-#     messages = [{"role": "system", "content": "You are a friendly AI assistant."}]
-#     for val in history:
-#         if val[0]:
-#             messages.append({"role": "user", "content": val[0]})
-#         if val[1]:
-#             messages.append({"role": "assistant", "content": val[1]})
-#     messages.append({"role": "user", "content": message})
-#     # Tokenize and generate response
-#     inputs = tokenizer.apply_chat_template(messages, tokenize=False)
-#     input_tokens = tokenizer(inputs, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
-#     output_tokens = current_model.generate(
-#         **input_tokens,
-#         max_new_tokens=max_tokens,
-#         temperature=temperature,
-#         top_p=top_p,
-#         pad_token_id=tokenizer.pad_token_id,
-#         eos_token_id=tokenizer.eos_token_id,
-#     )
-#     response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
-#     return response
-# # Define Gradio Chat Interface
-# demo = gr.ChatInterface(
-#     fn=respond,
-#     additional_inputs=[
-#         gr.Dropdown(choices=["Hugging face dataset", "Proprietary dataset1"], value="Fine-Tuned Model", label="Select Model"),
-#         gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max Tokens"),
-#         gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
-#         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
-#     ],
-# )
-# if __name__ == "__main__":
-#     demo.launch()
 import torch
-import os
 from transformers import AutoModelForCausalLM, AutoTokenizer
-# Define model and tokenizer paths
 MODEL_1_PATH = "Priyanka6/fine-tuning-inference"
-TOKENIZER_NAME = "sarvam/sarvam-1"  # Keep this unchanged if tokenizer hasn't changed
 def trim_adapter_weights(model_path):
     """
     Trims the last token from the adapter's lm_head.lora_B.default.weight
     if there is a mismatch with the base model.
     """
-    adapter_file = os.path.join(model_path, "adapter_model.safetensors")
-    if not os.path.exists(adapter_file):
-        raise FileNotFoundError(f"Adapter file not found: {adapter_file}")
-    checkpoint = torch.load(adapter_file, map_location="cpu")
     key_to_trim = "lm_head.lora_B.default.weight"
@@ -195,36 +102,80 @@ def trim_adapter_weights(model_path):
         torch.save(checkpoint, trimmed_adapter_path)
         return trimmed_adapter_path
-    return adapter_file
-# Before loading the adapter, trim it if necessary
-trimmed_adapter_path = trim_adapter_weights(MODEL_1_PATH)
-# Load the tokenizer
 tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
-# Load the model
-# model = AutoModelForCausalLM.from_pretrained(
-#     MODEL_1_PATH, torch_dtype=torch.float16, device_map="auto"
-# )
-model = AutoModelForCausalLM.from_pretrained("Priyanka6/fine-tuning-inference", use_auth_token=True)
-# Load the trimmed adapter
-model.load_adapter(trimmed_adapter_path, "safe_tensors")
-# Chat function
-def chat(query):
-    inputs = tokenizer(query, return_tensors="pt").to("cuda")
-    with torch.no_grad():
-        output = model.generate(**inputs, max_new_tokens=100)
-    return tokenizer.decode(output[0], skip_special_tokens=True)
-# Test the chatbot
-if __name__ == "__main__":
-    while True:
-        query = input("User: ")
-        if query.lower() in ["exit", "quit"]:
-            break
-        response = chat(query)
-        print(f"Bot: {response}")

 # # if __name__ == "__main__":
 # #     demo.launch()
 import torch
+import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
+import os
+# Define model names
+MODEL_1_PATH = "./adapter_model.safetensors"  # Local path inside Space
+###
 MODEL_1_PATH = "Priyanka6/fine-tuning-inference"
+###
+MODEL_2_NAME = "sarvamai/sarvam-1"  # The base model on Hugging Face Hub
+# MODEL_3_NAME =
 def trim_adapter_weights(model_path):
     """
     Trims the last token from the adapter's lm_head.lora_B.default.weight
     if there is a mismatch with the base model.
     """
+    if not os.path.exists(model_path):
+        raise FileNotFoundError(f"Adapter file not found: {model_path}")
+    checkpoint = torch.load(model_path, map_location="cpu")
     key_to_trim = "lm_head.lora_B.default.weight"
         torch.save(checkpoint, trimmed_adapter_path)
         return trimmed_adapter_path
+    return model_path
+# trimmed_adapter_path = trim_adapter_weights(MODEL_1_PATH)
+# Load the tokenizer (same for both models)
+TOKENIZER_NAME = "sarvamai/sarvam-1"
 tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
+# Function to load a model
+def load_model(model_choice,trimmed_adapter_path):
+    if model_choice == "Hugging face dataset":
+        model = AutoModelForCausalLM.from_pretrained("./", torch_dtype=torch.float16, device_map="auto")
+        model.load_adapter(trimmed_adapter_path, "safe_tensors")  # Load safetensors adapter
+    else:
+        model = AutoModelForCausalLM.from_pretrained(MODEL_2_NAME)
+    model.eval()
+    return model
+# Load default model on startup
+current_model = load_model("Hugging face dataset")
+# Chatbot response function
+def respond(message, history, model_choice, max_tokens, temperature, top_p):
+    global current_model
+    # Switch model if user selects a different one
+    if (model_choice == "Hugging face dataset" and current_model is not None and current_model.config.name_or_path != MODEL_1_PATH) or \
+       (model_choice == "Proprietary dataset1" and current_model is not None and current_model.config.name_or_path != MODEL_2_NAME):
+        current_model = load_model(model_choice)
+    # Convert chat history to format
+    messages = [{"role": "system", "content": "You are a friendly AI assistant."}]
+    for val in history:
+        if val[0]:
+            messages.append({"role": "user", "content": val[0]})
+        if val[1]:
+            messages.append({"role": "assistant", "content": val[1]})
+    messages.append({"role": "user", "content": message})
+    # Tokenize and generate response
+    inputs = tokenizer.apply_chat_template(messages, tokenize=False)
+    input_tokens = tokenizer(inputs, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
+    output_tokens = current_model.generate(
+        **input_tokens,
+        max_new_tokens=max_tokens,
+        temperature=temperature,
+        top_p=top_p,
+        pad_token_id=tokenizer.pad_token_id,
+        eos_token_id=tokenizer.eos_token_id,
+    )
+    response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
+    return response
+# Define Gradio Chat Interface
+demo = gr.ChatInterface(
+    fn=respond,
+    additional_inputs=[
+        gr.Dropdown(choices=["Hugging face dataset", "Proprietary dataset1"], value="Fine-Tuned Model", label="Select Model"),
+        gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max Tokens"),
+        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
+    ],
+)
+if __name__ == "__main__":
+    demo.launch()
+# # Test the chatbot
+# if __name__ == "__main__":
+#     while True:
+#         query = input("User: ")
+#         if query.lower() in ["exit", "quit"]:
+#             break
+#         response = chat(query)
+#         print(f"Bot: {response}")