Spaces:

nvidia
/

Cosmos-Predict2

Runtime error

multimodalart HF Staff commited on Jun 17

Commit

b4b0028

verified ·

1 Parent(s): 3979845

faster safety checking

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,16 +1,45 @@
 import gradio as gr
 import spaces
 import torch
 from diffusers import Cosmos2TextToImagePipeline, EDMEulerScheduler
 import random
 model_14b_id = "nvidia/Cosmos-Predict2-14B-Text2Image"
 pipe_14b = Cosmos2TextToImagePipeline.from_pretrained(
     model_14b_id,
     torch_dtype=torch.bfloat16
 )
 pipe_14b.to("cuda")
 @spaces.GPU(duration=140)

+import subprocess
+subprocess.run(
+    "pip install flash-attn --no-build-isolation", env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, shell=True
+)
 import gradio as gr
 import spaces
 import torch
 from diffusers import Cosmos2TextToImagePipeline, EDMEulerScheduler
+from transformers import AutoModelForCausalLM, SiglipProcessor
 import random
+#Add flash_attention_2 to the safeguard model
+def patch_from_pretrained(cls):
+    orig_method = cls.from_pretrained
+    def new_from_pretrained(*args, **kwargs):
+        kwargs.setdefault("attn_implementation", "flash_attention_2")
+        kwargs.setdefault("torch_dtype", torch.bfloat16)
+        return orig_method(*args, **kwargs)
+    cls.from_pretrained = new_from_pretrained
+patch_from_pretrained(AutoModelForCausalLM)
+#Add a `use_fast` to the safeguard image processor
+def patch_processor_fast(cls):
+    orig_method = cls.from_pretrained
+    def new_from_pretrained(*args, **kwargs):
+        kwargs.setdefault("use_fast", True)
+        return orig_method(*args, **kwargs)
+    cls.from_pretrained = new_from_pretrained
+patch_processor_fast(SiglipProcessor)
 model_14b_id = "nvidia/Cosmos-Predict2-14B-Text2Image"
 pipe_14b = Cosmos2TextToImagePipeline.from_pretrained(
     model_14b_id,
     torch_dtype=torch.bfloat16
 )
 pipe_14b.to("cuda")
 @spaces.GPU(duration=140)