Spaces:

markhristov
/

diffusion_model_from_scratch

Running

markhristov commited on Mar 9, 2024

Commit

2d82e67

1 Parent(s): cbc8ee5

long()

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,11 +8,11 @@ import gradio as gr
 #from IPython.display import display
 tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=torch.float16)
-text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=torch.float16).float()
 # Here we use a different VAE to the original release, which has been fine-tuned for more steps
-vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-ema", torch_dtype=torch.float16).float()
-unet = UNet2DConditionModel.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="unet", torch_dtype=torch.float16).float()
 beta_start,beta_end = 0.00085,0.012
 height = 512
@@ -27,7 +27,7 @@ scheduler = LMSDiscreteScheduler(beta_start=beta_start, beta_end=beta_end, beta_
 def text_enc(prompts, maxlen=None):
     if maxlen is None: maxlen = tokenizer.model_max_length
     inp = tokenizer(prompts, padding="max_length", max_length=maxlen, truncation=True, return_tensors="pt")
-    return text_encoder(inp.input_ids.float())[0]
 def do_both(prompts):
     def mk_img(t):

 #from IPython.display import display
 tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=torch.float16)
+text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=torch.float16)
 # Here we use a different VAE to the original release, which has been fine-tuned for more steps
+vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-ema", torch_dtype=torch.float16)
+unet = UNet2DConditionModel.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="unet", torch_dtype=torch.float16)
 beta_start,beta_end = 0.00085,0.012
 height = 512
 def text_enc(prompts, maxlen=None):
     if maxlen is None: maxlen = tokenizer.model_max_length
     inp = tokenizer(prompts, padding="max_length", max_length=maxlen, truncation=True, return_tensors="pt")
+    return text_encoder(inp.input_ids.long())[0]
 def do_both(prompts):
     def mk_img(t):