Update app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,7 @@ from transformers import (
|
|
| 6 |
AutoModelForCausalLM,
|
| 7 |
TextIteratorStreamer,
|
| 8 |
pipeline,
|
|
|
|
| 9 |
)
|
| 10 |
from threading import Thread
|
| 11 |
|
|
@@ -21,13 +22,19 @@ from threading import Thread
|
|
| 21 |
model_name_or_path = "TheBloke/phi-2-GPTQ"
|
| 22 |
# To use a different branch, change revision
|
| 23 |
# For example: revision="gptq-4bit-32g-actorder_True"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
|
| 25 |
device_map="cpu",
|
| 26 |
trust_remote_code=True,
|
| 27 |
-
revision="main"
|
|
|
|
| 28 |
|
| 29 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
| 30 |
|
|
|
|
| 31 |
# Text generation pipeline
|
| 32 |
phi2 = pipeline(
|
| 33 |
"text-generation",
|
|
|
|
| 6 |
AutoModelForCausalLM,
|
| 7 |
TextIteratorStreamer,
|
| 8 |
pipeline,
|
| 9 |
+
AutoConfig,
|
| 10 |
)
|
| 11 |
from threading import Thread
|
| 12 |
|
|
|
|
| 22 |
model_name_or_path = "TheBloke/phi-2-GPTQ"
|
| 23 |
# To use a different branch, change revision
|
| 24 |
# For example: revision="gptq-4bit-32g-actorder_True"
|
| 25 |
+
|
| 26 |
+
config = AutoConfig.from_pretrained(model_name_or_path)
|
| 27 |
+
config.quantization_config["disable_exllama"] = True
|
| 28 |
+
|
| 29 |
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
|
| 30 |
device_map="cpu",
|
| 31 |
trust_remote_code=True,
|
| 32 |
+
revision="main",
|
| 33 |
+
config=config)
|
| 34 |
|
| 35 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
| 36 |
|
| 37 |
+
|
| 38 |
# Text generation pipeline
|
| 39 |
phi2 = pipeline(
|
| 40 |
"text-generation",
|