Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,7 @@ import torch
|
|
6 |
from diffusers import DiffusionPipeline, FlowMatchEulerDiscreteScheduler, AutoencoderTiny, AutoencoderKL
|
7 |
from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast
|
8 |
from live_preview_helpers import calculate_shift, retrieve_timesteps, flux_pipe_call_that_returns_an_iterable_of_images
|
|
|
9 |
|
10 |
dtype = torch.bfloat16
|
11 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -13,6 +14,8 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
13 |
taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
|
14 |
good_vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae", torch_dtype=dtype).to(device)
|
15 |
pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=dtype, vae=taef1).to(device)
|
|
|
|
|
16 |
torch.cuda.empty_cache()
|
17 |
|
18 |
MAX_SEED = np.iinfo(np.int32).max
|
|
|
6 |
from diffusers import DiffusionPipeline, FlowMatchEulerDiscreteScheduler, AutoencoderTiny, AutoencoderKL
|
7 |
from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast
|
8 |
from live_preview_helpers import calculate_shift, retrieve_timesteps, flux_pipe_call_that_returns_an_iterable_of_images
|
9 |
+
from torchao.quantization.quant_api import Int8WeightOnlyConfig, quantize_
|
10 |
|
11 |
dtype = torch.bfloat16
|
12 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
14 |
taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
|
15 |
good_vae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae", torch_dtype=dtype).to(device)
|
16 |
pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=dtype, vae=taef1).to(device)
|
17 |
+
quantize_(pipe.transformer, Int8WeightOnlyConfig())
|
18 |
+
|
19 |
torch.cuda.empty_cache()
|
20 |
|
21 |
MAX_SEED = np.iinfo(np.int32).max
|