keynes42 commited on
Commit
aa13977
·
verified ·
1 Parent(s): 8340c60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -75,7 +75,7 @@ class BasicModel:
75
  model_id,
76
  torch_dtype=torch.float16,
77
  device_map="auto", ## auto-distributes to GPU
78
- attn_implementation="flash_attention_2",
79
  token=hf_token,
80
  trust_remote_code=True, ## <- Use the custom code that isn't part of the base transformers library yet
81
  quantization_config=quantization_config ## <- Load 4-bit quantization because vRAM is not big enough
 
75
  model_id,
76
  torch_dtype=torch.float16,
77
  device_map="auto", ## auto-distributes to GPU
78
+ # attn_implementation="flash_attention_2", ## Not able to install 'flash-attn' here for now
79
  token=hf_token,
80
  trust_remote_code=True, ## <- Use the custom code that isn't part of the base transformers library yet
81
  quantization_config=quantization_config ## <- Load 4-bit quantization because vRAM is not big enough