Spaces:

markhristov
/

diffusion_model_from_scratch

Running

markhristov commited on Mar 9, 2024

Commit

ac3f08e

1 Parent(s): 686a471

No Nvidia GPU...

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,11 +8,11 @@ import gradio as gr
 #from IPython.display import display
 tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=torch.float16)
-text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=torch.float16).to("cuda")
 # Here we use a different VAE to the original release, which has been fine-tuned for more steps
-vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-ema", torch_dtype=torch.float16).to("cuda")
-unet = UNet2DConditionModel.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="unet", torch_dtype=torch.float16).to("cuda")
 beta_start,beta_end = 0.00085,0.012
 height = 512
@@ -27,7 +27,7 @@ scheduler = LMSDiscreteScheduler(beta_start=beta_start, beta_end=beta_end, beta_
 def text_enc(prompts, maxlen=None):
     if maxlen is None: maxlen = tokenizer.model_max_length
     inp = tokenizer(prompts, padding="max_length", max_length=maxlen, truncation=True, return_tensors="pt")
-    return text_encoder(inp.input_ids.to("cuda"))[0].half()
 def do_both(prompts):
     def mk_img(t):
@@ -43,7 +43,7 @@ def do_both(prompts):
         latents = torch.randn((bs, unet.config.in_channels, height//8, width//8))
         scheduler.set_timesteps(steps)
-        latents = latents.to("cuda").half() * scheduler.init_noise_sigma
         for i,ts in enumerate(tqdm(scheduler.timesteps)):
             inp = scheduler.scale_model_input(torch.cat([latents] * 2), ts)

 #from IPython.display import display
 tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=torch.float16)
+text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=torch.float16).to("cpu")
 # Here we use a different VAE to the original release, which has been fine-tuned for more steps
+vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-ema", torch_dtype=torch.float16).to("cpu")
+unet = UNet2DConditionModel.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="unet", torch_dtype=torch.float16).to("cpu")
 beta_start,beta_end = 0.00085,0.012
 height = 512
 def text_enc(prompts, maxlen=None):
     if maxlen is None: maxlen = tokenizer.model_max_length
     inp = tokenizer(prompts, padding="max_length", max_length=maxlen, truncation=True, return_tensors="pt")
+    return text_encoder(inp.input_ids.to("cpu"))[0].half()
 def do_both(prompts):
     def mk_img(t):
         latents = torch.randn((bs, unet.config.in_channels, height//8, width//8))
         scheduler.set_timesteps(steps)
+        latents = latents.to("cpu").half() * scheduler.init_noise_sigma
         for i,ts in enumerate(tqdm(scheduler.timesteps)):
             inp = scheduler.scale_model_input(torch.cat([latents] * 2), ts)