MykosX commited on
Commit
a35db05
·
verified ·
1 Parent(s): 9fc66f0

uploaded model

Browse files
Files changed (36) hide show
  1. .gitattributes +4 -0
  2. LICENSE +15 -0
  3. README.md +223 -0
  4. feature_extractor/preprocessor_config.json +27 -0
  5. images/couple-having-fun-(from-generated)-300.jpg +0 -0
  6. images/couple-having-fun-(from-generated)-400.jpg +0 -0
  7. images/couple-having-fun-(from-generated)-500.jpg +0 -0
  8. images/couple-having-fun-(from-original)-300.jpg +3 -0
  9. images/couple-having-fun-(from-original)-400.jpg +3 -0
  10. images/couple-having-fun-(from-original)-500.jpg +3 -0
  11. images/couple-having-fun-(generated)-300.jpg +0 -0
  12. images/couple-having-fun-(generated)-400.jpg +0 -0
  13. images/couple-having-fun-(generated)-500.jpg +0 -0
  14. images/couple-having-fun-(original).jpg +3 -0
  15. images/girl-posing-photo-(from-generated)-300.jpg +0 -0
  16. images/girl-posing-photo-(from-generated)-400.jpg +0 -0
  17. images/girl-posing-photo-(from-generated)-500.jpg +0 -0
  18. images/girl-posing-photo-(from-original)-300.jpg +0 -0
  19. images/girl-posing-photo-(from-original)-400.jpg +0 -0
  20. images/girl-posing-photo-(from-original)-500.jpg +0 -0
  21. images/girl-posing-photo-(generated)-300.jpg +0 -0
  22. images/girl-posing-photo-(generated)-400.jpg +0 -0
  23. images/girl-posing-photo-(generated)-500.jpg +0 -0
  24. images/girl-posing-photo-(original).jpg +0 -0
  25. model_index.json +38 -0
  26. scheduler/scheduler_config.json +19 -0
  27. text_encoder/config.json +24 -0
  28. text_encoder/model.safetensors +3 -0
  29. tokenizer/merges.txt +0 -0
  30. tokenizer/special_tokens_map.json +30 -0
  31. tokenizer/tokenizer_config.json +31 -0
  32. tokenizer/vocab.json +0 -0
  33. unet/config.json +68 -0
  34. unet/diffusion_pytorch_model.safetensors +3 -0
  35. vae/config.json +38 -0
  36. vae/diffusion_pytorch_model.safetensors +3 -0
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ images/couple-having-fun-(from-original)-300.jpg filter=lfs diff=lfs merge=lfs -text
37
+ images/couple-having-fun-(from-original)-400.jpg filter=lfs diff=lfs merge=lfs -text
38
+ images/couple-having-fun-(from-original)-500.jpg filter=lfs diff=lfs merge=lfs -text
39
+ images/couple-having-fun-(original).jpg filter=lfs diff=lfs merge=lfs -text
LICENSE ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright (c) 2025 MykosX
2
+
3
+ Permission is hereby granted, free of charge, to any person or entity obtaining a copy of this software and associated documentation files (the “Software”), to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, including the right to use the output and results produced by the Software for any purpose, commercial or non-commercial.
4
+
5
+ Conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ Modifications must be clearly marked as such.
10
+
11
+ Disclaimer:
12
+
13
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
14
+
15
+ By using this code, you agree to these terms.
README.md ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ tags:
5
+ - stable-diffusion
6
+ - text-to-image
7
+ - image-to-image
8
+ - photo
9
+ pipeline_tag: text-to-image
10
+ ---
11
+
12
+ # Leia photo sd
13
+
14
+ `MykosX/leia-photo-sd` is a Stable Diffusion model that can be used both for:
15
+ - text-to-image: generates quite good photo images, may generate bad faces for people far from viewer
16
+ - image-to-image: tends to improve the quality of images generated by this model, does a good work on images from other models
17
+
18
+ ## Image show-case
19
+ <table>
20
+ <tr>
21
+ <th></th>
22
+ <th>(seed=300)**</th>
23
+ <th>(seed=400)**</th>
24
+ <th>(seed=500)**</th>
25
+ </tr>
26
+ <tr>
27
+ <td>
28
+ text-to-image
29
+ </td>
30
+ <td>
31
+ <img src="images/couple-having-fun-(generated)-300.jpg" width="600"/>
32
+ </td>
33
+ <td>
34
+ <img src="images/couple-having-fun-(generated)-400.jpg" width="600"/>
35
+ </td>
36
+ <td>
37
+ <img src="images/couple-having-fun-(generated)-500.jpg" width="600"/>
38
+ </td>
39
+ </tr>
40
+ <tr>
41
+ <td>
42
+ image-to-image
43
+ </td>
44
+ <td>
45
+ <img src="images/couple-having-fun-(from-generated)-300.jpg" width="600"/>
46
+ </td>
47
+ <td>
48
+ <img src="images/couple-having-fun-(from-generated)-400.jpg" width="600"/>
49
+ </td>
50
+ <td>
51
+ <img src="images/couple-having-fun-(from-generated)-500.jpg" width="600"/>
52
+ </td>
53
+ </tr>
54
+ </table>
55
+
56
+ <table>
57
+ <tr>
58
+ <th></th>
59
+ <th>(seed=300)**</th>
60
+ <th>(seed=400)**</th>
61
+ <th>(seed=500)**</th>
62
+ </tr>
63
+ <tr>
64
+ <td>
65
+ text-to-image
66
+ </td>
67
+ <td>
68
+ <img src="images/girl-posing-photo-(generated)-300.jpg" width="400"/>
69
+ </td>
70
+ <td>
71
+ <img src="images/girl-posing-photo-(generated)-400.jpg" width="400"/>
72
+ </td>
73
+ <td>
74
+ <img src="images/girl-posing-photo-(generated)-500.jpg" width="400"/>
75
+ </td>
76
+ </tr>
77
+ <tr>
78
+ <td>
79
+ image-to-image
80
+ </td>
81
+ <td>
82
+ <img src="images/girl-posing-photo-(from-generated)-300.jpg" width="400"/>
83
+ </td>
84
+ <td>
85
+ <img src="images/girl-posing-photo-(from-generated)-400.jpg" width="400"/>
86
+ </td>
87
+ <td>
88
+ <img src="images/girl-posing-photo-(from-generated)-500.jpg" width="400"/>
89
+ </td>
90
+ </tr>
91
+ </table>
92
+
93
+ <table>
94
+ <tr>
95
+ <th>Base image (from another model)</th>
96
+ <th>image-to-image (seed=300)**</th>
97
+ <th>image-to-image (seed=400)**</th>
98
+ <th>image-to-image (seed=500)**</th>
99
+ </tr>
100
+ <tr>
101
+ <td>
102
+ <img src="images/couple-having-fun-(original).jpg" width="600"/>
103
+ </td>
104
+ <td>
105
+ <img src="images/couple-having-fun-(from-original)-300.jpg" width="600"/>
106
+ </td>
107
+ <td>
108
+ <img src="images/couple-having-fun-(from-original)-400.jpg" width="600"/>
109
+ </td>
110
+ <td>
111
+ <img src="images/couple-having-fun-(from-original)-500.jpg" width="600"/>
112
+ </td>
113
+ </tr>
114
+ <tr>
115
+ <td>
116
+ <img src="images/girl-posing-photo-(original).jpg" width="600"/>
117
+ </td>
118
+ <td>
119
+ <img src="images/girl-posing-photo-(from-original)-300.jpg" width="600"/>
120
+ </td>
121
+ <td>
122
+ <img src="images/girl-posing-photo-(from-original)-400.jpg" width="600"/>
123
+ </td>
124
+ <td>
125
+ <img src="images/girl-posing-photo-(from-original)-500.jpg" width="600"/>
126
+ </td>
127
+ </tr>
128
+ <tr>
129
+ <th>Base image (from another model)</th>
130
+ <th>image-to-image (seed=300)*</th>
131
+ <th>image-to-image (seed=400)*</th>
132
+ <th>image-to-image (seed=500)*</th>
133
+ </tr>
134
+ </table>
135
+
136
+ ** using these defaults unless specified:
137
+ <table>
138
+ <tr>
139
+ <th>Setting</th>
140
+ <th>Default value</th>
141
+ </tr>
142
+ <tr>
143
+ <td>prompt (landscape)</td>
144
+ <td>landscape image, a boy and girl having fun on the beach</td>
145
+ </tr>
146
+ <tr>
147
+ <td>prompt (portrait)</td>
148
+ <td>portrait image, a girl in a nice dress posing for a photo</td>
149
+ </tr>
150
+ <tr>
151
+ <td>negative prompt</td>
152
+ <td>deformed iris, deformed pupils, bad anatomy, cloned face, extra arms, extra legs, missing fingers, too many fingers</td>
153
+ </tr>
154
+ <tr>
155
+ <td>size (landscape)</td>
156
+ <td>1024 x 768</td>
157
+ </tr>
158
+ <tr>
159
+ <td>size (portrait)</td>
160
+ <td> 768 x 1024</td>
161
+ </tr>
162
+ <tr>
163
+ <td>seed</td>
164
+ <td>300</td>
165
+ </tr>
166
+ <tr>
167
+ <td>guidance scale</td>
168
+ <td>12.0</td>
169
+ </tr>
170
+ <tr>
171
+ <td>strength</td>
172
+ <td>0.5</td>
173
+ </tr>
174
+ <tr>
175
+ <td>inference steps</td>
176
+ <td>30</td>
177
+ </tr>
178
+ </table>
179
+
180
+ ## Diffusers
181
+
182
+ For more general information on how to run text-to-image models with 🧨 Diffusers, see [the docs](https://huggingface.co/docs/diffusers/using-diffusers/conditional_image_generation).
183
+
184
+ 1. Installation
185
+
186
+ ```
187
+ pip install diffusers transformers accelerate
188
+ ```
189
+
190
+ 2. Running example for text-to-image generation
191
+ ```py
192
+ import torch
193
+
194
+ from diffusers import AutoPipelineForText2Image
195
+
196
+ pipe = AutoPipelineForText2Image.from_pretrained('MykosX/leia-photo-sd', torch_dtype=torch.float32)
197
+ pipe = pipe.to("cpu")
198
+
199
+ prompt = "portrait image, a girl in a nice dress posing for a photo"
200
+
201
+ image = pipe(prompt).images[0]
202
+ image.save("./images/text-to-image.png")
203
+ ```
204
+
205
+ 3. Running example for image-to-image generation
206
+ ```py
207
+ import torch
208
+
209
+ from diffusers import AutoPipelineForImage2Image
210
+ from PIL import Image
211
+
212
+ pipe = AutoPipelineForImage2Image.from_pretrained('MykosX/leia-photo-sd', torch_dtype=torch.float32)
213
+ pipe = pipe.to("cpu")
214
+
215
+ base_image = Image.open("./images/girl-posing-photo-(original).jpg")
216
+ prompt = "portrait image, a girl in a nice dress posing for a photo"
217
+
218
+ image = pipe(prompt, image=base_image).images[0]
219
+ image.save("./images/image-to-image.png")
220
+ ```
221
+
222
+ ## PS
223
+ Play with the model and don't hesitate to show off
feature_extractor/preprocessor_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "shortest_edge": 224
26
+ }
27
+ }
images/couple-having-fun-(from-generated)-300.jpg ADDED
images/couple-having-fun-(from-generated)-400.jpg ADDED
images/couple-having-fun-(from-generated)-500.jpg ADDED
images/couple-having-fun-(from-original)-300.jpg ADDED

Git LFS Details

  • SHA256: 7a9e42e538faf2ad53a5c6e745e8c121fab0ae7b00133929039e756c31d568d7
  • Pointer size: 131 Bytes
  • Size of remote file: 122 kB
images/couple-having-fun-(from-original)-400.jpg ADDED

Git LFS Details

  • SHA256: ccea9ebaf889e8f48f1b66eb1b593a67275938103de63cb2b40c441c8695789d
  • Pointer size: 131 Bytes
  • Size of remote file: 126 kB
images/couple-having-fun-(from-original)-500.jpg ADDED

Git LFS Details

  • SHA256: 5cf0a7ee465d3d2010941f78cb42d5762ae3c6b4d8f855010522bf3541c7495b
  • Pointer size: 131 Bytes
  • Size of remote file: 117 kB
images/couple-having-fun-(generated)-300.jpg ADDED
images/couple-having-fun-(generated)-400.jpg ADDED
images/couple-having-fun-(generated)-500.jpg ADDED
images/couple-having-fun-(original).jpg ADDED

Git LFS Details

  • SHA256: f4b71e1ae245e54d14cd48ed58f8bf43071e87766322b16d534dad927f70b92a
  • Pointer size: 131 Bytes
  • Size of remote file: 123 kB
images/girl-posing-photo-(from-generated)-300.jpg ADDED
images/girl-posing-photo-(from-generated)-400.jpg ADDED
images/girl-posing-photo-(from-generated)-500.jpg ADDED
images/girl-posing-photo-(from-original)-300.jpg ADDED
images/girl-posing-photo-(from-original)-400.jpg ADDED
images/girl-posing-photo-(from-original)-500.jpg ADDED
images/girl-posing-photo-(generated)-300.jpg ADDED
images/girl-posing-photo-(generated)-400.jpg ADDED
images/girl-posing-photo-(generated)-500.jpg ADDED
images/girl-posing-photo-(original).jpg ADDED
model_index.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableDiffusionPipeline",
3
+ "_diffusers_version": "0.33.1",
4
+ "_name_or_path": "d:\\tools\\ai\\mix\\mixx",
5
+ "feature_extractor": [
6
+ "transformers",
7
+ "CLIPImageProcessor"
8
+ ],
9
+ "image_encoder": [
10
+ null,
11
+ null
12
+ ],
13
+ "requires_safety_checker": false,
14
+ "safety_checker": [
15
+ null,
16
+ null
17
+ ],
18
+ "scheduler": [
19
+ "diffusers",
20
+ "EulerAncestralDiscreteScheduler"
21
+ ],
22
+ "text_encoder": [
23
+ "transformers",
24
+ "CLIPTextModel"
25
+ ],
26
+ "tokenizer": [
27
+ "transformers",
28
+ "CLIPTokenizer"
29
+ ],
30
+ "unet": [
31
+ "diffusers",
32
+ "UNet2DConditionModel"
33
+ ],
34
+ "vae": [
35
+ "diffusers",
36
+ "AutoencoderKL"
37
+ ]
38
+ }
scheduler/scheduler_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "EulerAncestralDiscreteScheduler",
3
+ "_diffusers_version": "0.33.1",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "interpolation_type": "linear",
9
+ "num_train_timesteps": 1000,
10
+ "prediction_type": "epsilon",
11
+ "rescale_betas_zero_snr": false,
12
+ "sample_max_value": 1.0,
13
+ "set_alpha_to_one": false,
14
+ "skip_prk_steps": true,
15
+ "steps_offset": 1,
16
+ "timestep_spacing": "leading",
17
+ "trained_betas": null,
18
+ "use_karras_sigmas": false
19
+ }
text_encoder/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "CLIPTextModel"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 0,
7
+ "dropout": 0.0,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "quick_gelu",
10
+ "hidden_size": 768,
11
+ "initializer_factor": 1.0,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 77,
16
+ "model_type": "clip_text_model",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 1,
20
+ "projection_dim": 768,
21
+ "torch_dtype": "float16",
22
+ "transformers_version": "4.52.4",
23
+ "vocab_size": 49408
24
+ }
text_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87cc8e6f1f4110d4fc7de62fee1af1450deef176a61c42741b7e22bd1449dbe3
3
+ size 246144152
tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "49406": {
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "49407": {
13
+ "content": "<|endoftext|>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ }
20
+ },
21
+ "bos_token": "<|startoftext|>",
22
+ "clean_up_tokenization_spaces": true,
23
+ "do_lower_case": true,
24
+ "eos_token": "<|endoftext|>",
25
+ "errors": "replace",
26
+ "extra_special_tokens": {},
27
+ "model_max_length": 77,
28
+ "pad_token": "<|endoftext|>",
29
+ "tokenizer_class": "CLIPTokenizer",
30
+ "unk_token": "<|endoftext|>"
31
+ }
tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
unet/config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.33.1",
4
+ "_name_or_path": "d:\\tools\\ai\\mix\\mixx\\unet",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": null,
7
+ "addition_embed_type_num_heads": 64,
8
+ "addition_time_embed_dim": null,
9
+ "attention_head_dim": 8,
10
+ "attention_type": "default",
11
+ "block_out_channels": [
12
+ 320,
13
+ 640,
14
+ 1280,
15
+ 1280
16
+ ],
17
+ "center_input_sample": false,
18
+ "class_embed_type": null,
19
+ "class_embeddings_concat": false,
20
+ "conv_in_kernel": 3,
21
+ "conv_out_kernel": 3,
22
+ "cross_attention_dim": 768,
23
+ "cross_attention_norm": null,
24
+ "down_block_types": [
25
+ "CrossAttnDownBlock2D",
26
+ "CrossAttnDownBlock2D",
27
+ "CrossAttnDownBlock2D",
28
+ "DownBlock2D"
29
+ ],
30
+ "downsample_padding": 1,
31
+ "dropout": 0.0,
32
+ "dual_cross_attention": false,
33
+ "encoder_hid_dim": null,
34
+ "encoder_hid_dim_type": null,
35
+ "flip_sin_to_cos": true,
36
+ "freq_shift": 0,
37
+ "in_channels": 4,
38
+ "layers_per_block": 2,
39
+ "mid_block_only_cross_attention": null,
40
+ "mid_block_scale_factor": 1,
41
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
42
+ "norm_eps": 1e-05,
43
+ "norm_num_groups": 32,
44
+ "num_attention_heads": null,
45
+ "num_class_embeds": null,
46
+ "only_cross_attention": false,
47
+ "out_channels": 4,
48
+ "projection_class_embeddings_input_dim": null,
49
+ "resnet_out_scale_factor": 1.0,
50
+ "resnet_skip_time_act": false,
51
+ "resnet_time_scale_shift": "default",
52
+ "reverse_transformer_layers_per_block": null,
53
+ "sample_size": 64,
54
+ "time_cond_proj_dim": null,
55
+ "time_embedding_act_fn": null,
56
+ "time_embedding_dim": null,
57
+ "time_embedding_type": "positional",
58
+ "timestep_post_act": null,
59
+ "transformer_layers_per_block": 1,
60
+ "up_block_types": [
61
+ "UpBlock2D",
62
+ "CrossAttnUpBlock2D",
63
+ "CrossAttnUpBlock2D",
64
+ "CrossAttnUpBlock2D"
65
+ ],
66
+ "upcast_attention": false,
67
+ "use_linear_projection": false
68
+ }
unet/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e73e60356a82ffeacf57bf509bdee51ffd77a7d1ac1ae017db2fe33730f86454
3
+ size 1719125304
vae/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.33.1",
4
+ "_name_or_path": "d:\\tools\\ai\\mix\\mixx\\vae",
5
+ "act_fn": "silu",
6
+ "block_out_channels": [
7
+ 128,
8
+ 256,
9
+ 512,
10
+ 512
11
+ ],
12
+ "down_block_types": [
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D",
16
+ "DownEncoderBlock2D"
17
+ ],
18
+ "force_upcast": true,
19
+ "in_channels": 3,
20
+ "latent_channels": 4,
21
+ "latents_mean": null,
22
+ "latents_std": null,
23
+ "layers_per_block": 2,
24
+ "mid_block_add_attention": true,
25
+ "norm_num_groups": 32,
26
+ "out_channels": 3,
27
+ "sample_size": 512,
28
+ "scaling_factor": 0.18215,
29
+ "shift_factor": null,
30
+ "up_block_types": [
31
+ "UpDecoderBlock2D",
32
+ "UpDecoderBlock2D",
33
+ "UpDecoderBlock2D",
34
+ "UpDecoderBlock2D"
35
+ ],
36
+ "use_post_quant_conv": true,
37
+ "use_quant_conv": true
38
+ }
vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94703a575c1a261653520bf56327b536a1da3f68cb0e420a52d46ffe8d45167c
3
+ size 167335342