Update app.py
Browse files
app.py
CHANGED
@@ -75,7 +75,7 @@ class BasicModel:
|
|
75 |
model_id,
|
76 |
torch_dtype=torch.float16,
|
77 |
device_map="auto", ## auto-distributes to GPU
|
78 |
-
attn_implementation="flash_attention_2",
|
79 |
token=hf_token,
|
80 |
trust_remote_code=True, ## <- Use the custom code that isn't part of the base transformers library yet
|
81 |
quantization_config=quantization_config ## <- Load 4-bit quantization because vRAM is not big enough
|
|
|
75 |
model_id,
|
76 |
torch_dtype=torch.float16,
|
77 |
device_map="auto", ## auto-distributes to GPU
|
78 |
+
# attn_implementation="flash_attention_2", ## Not able to install 'flash-attn' here for now
|
79 |
token=hf_token,
|
80 |
trust_remote_code=True, ## <- Use the custom code that isn't part of the base transformers library yet
|
81 |
quantization_config=quantization_config ## <- Load 4-bit quantization because vRAM is not big enough
|