Update README.md
Browse files
README.md
CHANGED
|
@@ -51,25 +51,6 @@ quant_config_4 = BitsAndBytesConfig(
|
|
| 51 |
llm_int8_enable_fp32_cpu_offload=True,
|
| 52 |
)
|
| 53 |
|
| 54 |
-
NUM_TRANS_LAYERS = 94
|
| 55 |
-
|
| 56 |
-
def create_device_map():
|
| 57 |
-
device_map = {
|
| 58 |
-
'model.embed_tokens': 0,
|
| 59 |
-
'model.norm': 0,
|
| 60 |
-
'lm_head': 0
|
| 61 |
-
}
|
| 62 |
-
for start, end, gpu_id in [(0, 4, 0), (4, 22, 1), (22, 40, 2), (40, 58, 3), (58, 76, 4), (76, 94, 5)]:
|
| 63 |
-
for i in range(start, end):
|
| 64 |
-
device_map[f'model.layers.{i}'] = gpu_id
|
| 65 |
-
|
| 66 |
-
#for i in range(76, NUM_TRANS_LAYERS):
|
| 67 |
-
# device_map[f'model.layers.{i}'] = "cpu"
|
| 68 |
-
|
| 69 |
-
return device_map
|
| 70 |
-
|
| 71 |
-
#device_map = create_device_map()
|
| 72 |
-
|
| 73 |
model = AutoModelForCausalLM.from_pretrained(
|
| 74 |
NEW_MODEL_ID,
|
| 75 |
device_map="balanced",
|
|
|
|
| 51 |
llm_int8_enable_fp32_cpu_offload=True,
|
| 52 |
)
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
model = AutoModelForCausalLM.from_pretrained(
|
| 55 |
NEW_MODEL_ID,
|
| 56 |
device_map="balanced",
|