ai_agent_course_final_project

Sleeping

keynes42 commited on Jun 9

Commit

e38adab

verified ·

1 Parent(s): fbd7ae6

Update app.py

Add flash attention

Files changed (1) hide show

app.py CHANGED Viewed

@@ -75,6 +75,7 @@ class BasicModel:
             model_id,
             torch_dtype=torch.float16,
             device_map="auto",  ## auto-distributes to GPU
             token=hf_token,
             trust_remote_code=True, ## <- Use the custom code that isn't part of the base transformers library yet
             quantization_config=quantization_config ## <- Load 4-bit quantization because vRAM is not big enough

             model_id,
             torch_dtype=torch.float16,
             device_map="auto",  ## auto-distributes to GPU
+            attn_implementation="flash_attention_2",
             token=hf_token,
             trust_remote_code=True, ## <- Use the custom code that isn't part of the base transformers library yet
             quantization_config=quantization_config ## <- Load 4-bit quantization because vRAM is not big enough