rezaenayati commited on
Commit
764b0a1
·
verified ·
1 Parent(s): 69803e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -8
app.py CHANGED
@@ -3,6 +3,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
3
  from peft import PeftModel
4
  import gradio as gr
5
 
 
6
  base_model = AutoModelForCausalLM.from_pretrained(
7
  "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
8
  torch_dtype=torch.float16,
@@ -10,45 +11,55 @@ base_model = AutoModelForCausalLM.from_pretrained(
10
  load_in_4bit=True
11
  )
12
 
13
- # tokenizer
14
  tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
15
 
16
- # LoRA adaptors
 
 
 
 
17
  model = PeftModel.from_pretrained(base_model, "rezaenayati/RezAi-Model")
18
 
19
  def chat_with_rezAi(messages, history):
20
  conversation = "<|start_header_id|>system<|end_header_id|>\nYou are Reza Enayati, a Computer Science student and entrepreneur from Los Angeles, who is eager to work as a software engineer or machine learning engineer. Answer these questions as if you are in an interview.<|eot_id|>"
21
 
 
22
  for user_msg, assistant_msg in history:
23
  conversation += f"<|start_header_id|>user<|end_header_id|>\n{user_msg}<|eot_id|>"
24
  conversation += f"<|start_header_id|>assistant<|end_header_id|>\n{assistant_msg}<|eot_id|>"
25
 
 
26
  conversation += f"<|start_header_id|>user<|end_header_id|>\n{messages}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n"
27
 
28
- inputs = tokenizer([conversation], return_tensors="pt")
 
 
29
 
30
  with torch.no_grad():
31
  outputs = model.generate(
32
- inputs,
33
  max_new_tokens=128,
34
  temperature=0.5,
35
  do_sample=True,
36
- pad_token_id=tokenizer.eos_token_id
 
37
  )
38
 
39
- # get response
40
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
41
  new_response = response.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
42
 
43
  return new_response
44
 
 
45
  demo = gr.ChatInterface(
46
  fn=chat_with_rezAi,
47
  title="💬 Chat with RezAI",
48
- description="Hi! I'm RezAI. Ask me about Reza's technical background, projects, or experience!",
49
  examples=[
50
  "Tell me about your background",
51
- "What programming languages do you know?",
52
  "Walk me through your Pizza Guys project",
53
  "What's your experience with machine learning?",
54
  "How did you get into computer science?"
 
3
  from peft import PeftModel
4
  import gradio as gr
5
 
6
+ # Load base model
7
  base_model = AutoModelForCausalLM.from_pretrained(
8
  "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
9
  torch_dtype=torch.float16,
 
11
  load_in_4bit=True
12
  )
13
 
14
+ # Load tokenizer
15
  tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
16
 
17
+ # Add padding token if missing
18
+ if tokenizer.pad_token is None:
19
+ tokenizer.pad_token = tokenizer.eos_token
20
+
21
+ # Load LoRA adapter
22
  model = PeftModel.from_pretrained(base_model, "rezaenayati/RezAi-Model")
23
 
24
  def chat_with_rezAi(messages, history):
25
  conversation = "<|start_header_id|>system<|end_header_id|>\nYou are Reza Enayati, a Computer Science student and entrepreneur from Los Angeles, who is eager to work as a software engineer or machine learning engineer. Answer these questions as if you are in an interview.<|eot_id|>"
26
 
27
+ # Add conversation history
28
  for user_msg, assistant_msg in history:
29
  conversation += f"<|start_header_id|>user<|end_header_id|>\n{user_msg}<|eot_id|>"
30
  conversation += f"<|start_header_id|>assistant<|end_header_id|>\n{assistant_msg}<|eot_id|>"
31
 
32
+ # Add current message
33
  conversation += f"<|start_header_id|>user<|end_header_id|>\n{messages}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n"
34
 
35
+ # Tokenize with proper handling
36
+ inputs = tokenizer(conversation, return_tensors="pt", truncate=True, max_length=2048)
37
+ inputs = {k: v.to(model.device) for k, v in inputs.items()} # Move to GPU
38
 
39
  with torch.no_grad():
40
  outputs = model.generate(
41
+ **inputs, # Unpack inputs properly
42
  max_new_tokens=128,
43
  temperature=0.5,
44
  do_sample=True,
45
+ pad_token_id=tokenizer.eos_token_id,
46
+ eos_token_id=tokenizer.eos_token_id
47
  )
48
 
49
+ # Decode response
50
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
51
  new_response = response.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
52
 
53
  return new_response
54
 
55
+ # Create Gradio interface
56
  demo = gr.ChatInterface(
57
  fn=chat_with_rezAi,
58
  title="💬 Chat with RezAI",
59
+ description="Hi! I'm RezAI, Reza's AI twin. Ask me about his technical background, projects, or experience!",
60
  examples=[
61
  "Tell me about your background",
62
+ "What programming languages do you know?",
63
  "Walk me through your Pizza Guys project",
64
  "What's your experience with machine learning?",
65
  "How did you get into computer science?"