Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,6 @@ import json
|
|
3 |
import gradio as gr
|
4 |
import torch.nn.functional as F
|
5 |
from transformers import AutoTokenizer, AutoModel
|
6 |
-
from peft import PeftModel
|
7 |
import time
|
8 |
import re
|
9 |
|
@@ -12,9 +11,10 @@ device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
12 |
print(f"Using device: {device}")
|
13 |
|
14 |
# Load base model and tokenizer
|
15 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
16 |
-
model = AutoModel.from_pretrained(
|
17 |
-
|
|
|
18 |
|
19 |
# Constants
|
20 |
MASK_TOKEN = "[MASK]"
|
@@ -717,7 +717,5 @@ If the user request does not necessitate a function call, simply respond to the
|
|
717 |
|
718 |
return demo
|
719 |
|
720 |
-
|
721 |
-
|
722 |
-
demo = create_chatbot_demo()
|
723 |
-
demo.queue().launch(share=True)
|
|
|
3 |
import gradio as gr
|
4 |
import torch.nn.functional as F
|
5 |
from transformers import AutoTokenizer, AutoModel
|
|
|
6 |
import time
|
7 |
import re
|
8 |
|
|
|
11 |
print(f"Using device: {device}")
|
12 |
|
13 |
# Load base model and tokenizer
|
14 |
+
tokenizer = AutoTokenizer.from_pretrained("Proximile/LLaDA-8B-Tools", trust_remote_code=True)
|
15 |
+
model = AutoModel.from_pretrained("Proximile/LLaDA-8B-Tools", trust_remote_code=True, torch_dtype=torch.bfloat16, load_in_4bit=True)
|
16 |
+
|
17 |
+
model.eval()
|
18 |
|
19 |
# Constants
|
20 |
MASK_TOKEN = "[MASK]"
|
|
|
717 |
|
718 |
return demo
|
719 |
|
720 |
+
demo = create_chatbot_demo()
|
721 |
+
demo.queue().launch(share=True)
|
|
|
|