Update app.py
Browse files
app.py
CHANGED
|
@@ -9,14 +9,24 @@ from transformers import (
|
|
| 9 |
)
|
| 10 |
from threading import Thread
|
| 11 |
|
| 12 |
-
|
| 13 |
-
checkpoint = "microsoft/phi-2"
|
| 14 |
|
| 15 |
-
|
| 16 |
-
tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
|
| 17 |
-
model = AutoModelForCausalLM.from_pretrained(
|
| 18 |
-
checkpoint, torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True
|
| 19 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
# Text generation pipeline
|
| 22 |
phi2 = pipeline(
|
|
@@ -80,9 +90,9 @@ with gr.Blocks() as demo:
|
|
| 80 |
gr.Markdown(
|
| 81 |
"""
|
| 82 |
# Phi-2 Chatbot Demo
|
| 83 |
-
This chatbot was created using Microsoft's 2.7 billion parameter [phi-2](https://huggingface.co/microsoft/phi-2) Transformer model.
|
| 84 |
|
| 85 |
-
In order to reduce the response time on this hardware, `max_new_tokens`
|
| 86 |
"""
|
| 87 |
)
|
| 88 |
|
|
|
|
| 9 |
)
|
| 10 |
from threading import Thread
|
| 11 |
|
| 12 |
+
## The huggingface model id for Microsoft's phi-2 model
|
| 13 |
+
#checkpoint = "microsoft/phi-2"
|
| 14 |
|
| 15 |
+
## Download and load model and tokenizer
|
| 16 |
+
#tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
|
| 17 |
+
#model = AutoModelForCausalLM.from_pretrained(
|
| 18 |
+
# checkpoint, torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True
|
| 19 |
+
#)
|
| 20 |
+
|
| 21 |
+
model_name_or_path = "TheBloke/phi-2-GPTQ"
|
| 22 |
+
# To use a different branch, change revision
|
| 23 |
+
# For example: revision="gptq-4bit-32g-actorder_True"
|
| 24 |
+
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
|
| 25 |
+
device_map="auto",
|
| 26 |
+
trust_remote_code=True,
|
| 27 |
+
revision="main")
|
| 28 |
+
|
| 29 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
| 30 |
|
| 31 |
# Text generation pipeline
|
| 32 |
phi2 = pipeline(
|
|
|
|
| 90 |
gr.Markdown(
|
| 91 |
"""
|
| 92 |
# Phi-2 Chatbot Demo
|
| 93 |
+
This chatbot was created using TheBloke/phi-2-GPTQ from Microsoft's 2.7 billion parameter [phi-2](https://huggingface.co/microsoft/phi-2) Transformer model.
|
| 94 |
|
| 95 |
+
In order to reduce the response time on this hardware, set `max_new_tokens` to lower number in the text generation pipeline.
|
| 96 |
"""
|
| 97 |
)
|
| 98 |
|