Spaces:

rezaenayati
/

RezAi

Running on Zero

App Files Files Community

rezaenayati commited on May 27

Commit

764b0a1

verified ·

1 Parent(s): 69803e4

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -8

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 from peft import PeftModel
 import gradio as gr
 base_model = AutoModelForCausalLM.from_pretrained(
     "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
     torch_dtype=torch.float16,
@@ -10,45 +11,55 @@ base_model = AutoModelForCausalLM.from_pretrained(
     load_in_4bit=True
 )
-# tokenizer
 tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
-# LoRA adaptors
 model = PeftModel.from_pretrained(base_model, "rezaenayati/RezAi-Model")
 def chat_with_rezAi(messages, history):
     conversation = "<|start_header_id|>system<|end_header_id|>\nYou are Reza Enayati, a Computer Science student and entrepreneur from Los Angeles, who is eager to work as a software engineer or machine learning engineer. Answer these questions as if you are in an interview.<|eot_id|>"
     for user_msg, assistant_msg in history:
         conversation += f"<|start_header_id|>user<|end_header_id|>\n{user_msg}<|eot_id|>"
         conversation += f"<|start_header_id|>assistant<|end_header_id|>\n{assistant_msg}<|eot_id|>"
     conversation += f"<|start_header_id|>user<|end_header_id|>\n{messages}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n"
-    inputs = tokenizer([conversation], return_tensors="pt")
     with torch.no_grad():
         outputs = model.generate(
-            inputs,
             max_new_tokens=128,
             temperature=0.5,
             do_sample=True,
-            pad_token_id=tokenizer.eos_token_id
         )
-    # get response
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     new_response = response.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
     return new_response
 demo = gr.ChatInterface(
     fn=chat_with_rezAi,
     title="💬 Chat with RezAI",
-    description="Hi! I'm RezAI. Ask me about Reza's technical background, projects, or experience!",
     examples=[
         "Tell me about your background",
-        "What programming languages do you know?",
         "Walk me through your Pizza Guys project",
         "What's your experience with machine learning?",
         "How did you get into computer science?"

 from peft import PeftModel
 import gradio as gr
+# Load base model
 base_model = AutoModelForCausalLM.from_pretrained(
     "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
     torch_dtype=torch.float16,
     load_in_4bit=True
 )
+# Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
+# Add padding token if missing
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+# Load LoRA adapter
 model = PeftModel.from_pretrained(base_model, "rezaenayati/RezAi-Model")
 def chat_with_rezAi(messages, history):
     conversation = "<|start_header_id|>system<|end_header_id|>\nYou are Reza Enayati, a Computer Science student and entrepreneur from Los Angeles, who is eager to work as a software engineer or machine learning engineer. Answer these questions as if you are in an interview.<|eot_id|>"
+    # Add conversation history
     for user_msg, assistant_msg in history:
         conversation += f"<|start_header_id|>user<|end_header_id|>\n{user_msg}<|eot_id|>"
         conversation += f"<|start_header_id|>assistant<|end_header_id|>\n{assistant_msg}<|eot_id|>"
+    # Add current message
     conversation += f"<|start_header_id|>user<|end_header_id|>\n{messages}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n"
+    # Tokenize with proper handling
+    inputs = tokenizer(conversation, return_tensors="pt", truncate=True, max_length=2048)
+    inputs = {k: v.to(model.device) for k, v in inputs.items()}  # Move to GPU
     with torch.no_grad():
         outputs = model.generate(
+            **inputs,  # Unpack inputs properly
             max_new_tokens=128,
             temperature=0.5,
             do_sample=True,
+            pad_token_id=tokenizer.eos_token_id,
+            eos_token_id=tokenizer.eos_token_id
         )
+    # Decode response
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     new_response = response.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
     return new_response
+# Create Gradio interface
 demo = gr.ChatInterface(
     fn=chat_with_rezAi,
     title="💬 Chat with RezAI",
+    description="Hi! I'm RezAI, Reza's AI twin. Ask me about his technical background, projects, or experience!",
     examples=[
         "Tell me about your background",
+        "What programming languages do you know?",
         "Walk me through your Pizza Guys project",
         "What's your experience with machine learning?",
         "How did you get into computer science?"