ProximileAdmin commited on
Commit
4a09143
·
verified ·
1 Parent(s): d2e905f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -8
app.py CHANGED
@@ -3,7 +3,6 @@ import json
3
  import gradio as gr
4
  import torch.nn.functional as F
5
  from transformers import AutoTokenizer, AutoModel
6
- from peft import PeftModel
7
  import time
8
  import re
9
 
@@ -12,9 +11,10 @@ device = 'cuda' if torch.cuda.is_available() else 'cpu'
12
  print(f"Using device: {device}")
13
 
14
  # Load base model and tokenizer
15
- tokenizer = AutoTokenizer.from_pretrained('GSAI-ML/LLaDA-8B-Instruct', trust_remote_code=True)
16
- model = AutoModel.from_pretrained('Proximile/LLaDA-8B-Tools', trust_remote_code=True,
17
- torch_dtype=torch.bfloat16, load_in_8bit=True)
 
18
 
19
  # Constants
20
  MASK_TOKEN = "[MASK]"
@@ -717,7 +717,5 @@ If the user request does not necessitate a function call, simply respond to the
717
 
718
  return demo
719
 
720
- # Launch the demo
721
- if __name__ == "__main__":
722
- demo = create_chatbot_demo()
723
- demo.queue().launch(share=True)
 
3
  import gradio as gr
4
  import torch.nn.functional as F
5
  from transformers import AutoTokenizer, AutoModel
 
6
  import time
7
  import re
8
 
 
11
  print(f"Using device: {device}")
12
 
13
  # Load base model and tokenizer
14
+ tokenizer = AutoTokenizer.from_pretrained("Proximile/LLaDA-8B-Tools", trust_remote_code=True)
15
+ model = AutoModel.from_pretrained("Proximile/LLaDA-8B-Tools", trust_remote_code=True, torch_dtype=torch.bfloat16, load_in_4bit=True)
16
+
17
+ model.eval()
18
 
19
  # Constants
20
  MASK_TOKEN = "[MASK]"
 
717
 
718
  return demo
719
 
720
+ demo = create_chatbot_demo()
721
+ demo.queue().launch(share=True)