Spaces:
Paused
Paused
disable flash_attention
Browse files- app_dialogue.py +6 -6
app_dialogue.py
CHANGED
@@ -2,11 +2,11 @@ import os
|
|
2 |
import subprocess
|
3 |
|
4 |
# Install flash attention
|
5 |
-
subprocess.run(
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
)
|
10 |
|
11 |
|
12 |
import copy
|
@@ -31,7 +31,7 @@ MODELS = {
|
|
31 |
"xgen-mm-phi3-mini-instruct-interleave-r-v1.5": AutoModelForVision2Seq.from_pretrained(
|
32 |
"Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5",
|
33 |
torch_dtype=torch.bfloat16,
|
34 |
-
_attn_implementation="flash_attention_2",
|
35 |
trust_remote_code=True
|
36 |
).to(DEVICE),
|
37 |
}
|
|
|
2 |
import subprocess
|
3 |
|
4 |
# Install flash attention
|
5 |
+
# subprocess.run(
|
6 |
+
# "pip install flash-attn --no-build-isolation",
|
7 |
+
# env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
8 |
+
# shell=True,
|
9 |
+
# )
|
10 |
|
11 |
|
12 |
import copy
|
|
|
31 |
"xgen-mm-phi3-mini-instruct-interleave-r-v1.5": AutoModelForVision2Seq.from_pretrained(
|
32 |
"Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5",
|
33 |
torch_dtype=torch.bfloat16,
|
34 |
+
# _attn_implementation="flash_attention_2",
|
35 |
trust_remote_code=True
|
36 |
).to(DEVICE),
|
37 |
}
|