uploaded model
Browse files- .gitattributes +4 -0
- LICENSE +15 -0
- README.md +223 -0
- feature_extractor/preprocessor_config.json +27 -0
- images/couple-having-fun-(from-generated)-300.jpg +0 -0
- images/couple-having-fun-(from-generated)-400.jpg +0 -0
- images/couple-having-fun-(from-generated)-500.jpg +0 -0
- images/couple-having-fun-(from-original)-300.jpg +3 -0
- images/couple-having-fun-(from-original)-400.jpg +3 -0
- images/couple-having-fun-(from-original)-500.jpg +3 -0
- images/couple-having-fun-(generated)-300.jpg +0 -0
- images/couple-having-fun-(generated)-400.jpg +0 -0
- images/couple-having-fun-(generated)-500.jpg +0 -0
- images/couple-having-fun-(original).jpg +3 -0
- images/girl-posing-photo-(from-generated)-300.jpg +0 -0
- images/girl-posing-photo-(from-generated)-400.jpg +0 -0
- images/girl-posing-photo-(from-generated)-500.jpg +0 -0
- images/girl-posing-photo-(from-original)-300.jpg +0 -0
- images/girl-posing-photo-(from-original)-400.jpg +0 -0
- images/girl-posing-photo-(from-original)-500.jpg +0 -0
- images/girl-posing-photo-(generated)-300.jpg +0 -0
- images/girl-posing-photo-(generated)-400.jpg +0 -0
- images/girl-posing-photo-(generated)-500.jpg +0 -0
- images/girl-posing-photo-(original).jpg +0 -0
- model_index.json +38 -0
- scheduler/scheduler_config.json +19 -0
- text_encoder/config.json +24 -0
- text_encoder/model.safetensors +3 -0
- tokenizer/merges.txt +0 -0
- tokenizer/special_tokens_map.json +30 -0
- tokenizer/tokenizer_config.json +31 -0
- tokenizer/vocab.json +0 -0
- unet/config.json +68 -0
- unet/diffusion_pytorch_model.safetensors +3 -0
- vae/config.json +38 -0
- vae/diffusion_pytorch_model.safetensors +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
images/couple-having-fun-(from-original)-300.jpg filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
images/couple-having-fun-(from-original)-400.jpg filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
images/couple-having-fun-(from-original)-500.jpg filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
images/couple-having-fun-(original).jpg filter=lfs diff=lfs merge=lfs -text
|
LICENSE
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Copyright (c) 2025 MykosX
|
| 2 |
+
|
| 3 |
+
Permission is hereby granted, free of charge, to any person or entity obtaining a copy of this software and associated documentation files (the “Software”), to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, including the right to use the output and results produced by the Software for any purpose, commercial or non-commercial.
|
| 4 |
+
|
| 5 |
+
Conditions:
|
| 6 |
+
|
| 7 |
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
| 8 |
+
|
| 9 |
+
Modifications must be clearly marked as such.
|
| 10 |
+
|
| 11 |
+
Disclaimer:
|
| 12 |
+
|
| 13 |
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
| 14 |
+
|
| 15 |
+
By using this code, you agree to these terms.
|
README.md
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language:
|
| 3 |
+
- en
|
| 4 |
+
tags:
|
| 5 |
+
- stable-diffusion
|
| 6 |
+
- text-to-image
|
| 7 |
+
- image-to-image
|
| 8 |
+
- photo
|
| 9 |
+
pipeline_tag: text-to-image
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
# Leia photo sd
|
| 13 |
+
|
| 14 |
+
`MykosX/leia-photo-sd` is a Stable Diffusion model that can be used both for:
|
| 15 |
+
- text-to-image: generates quite good photo images, may generate bad faces for people far from viewer
|
| 16 |
+
- image-to-image: tends to improve the quality of images generated by this model, does a good work on images from other models
|
| 17 |
+
|
| 18 |
+
## Image show-case
|
| 19 |
+
<table>
|
| 20 |
+
<tr>
|
| 21 |
+
<th></th>
|
| 22 |
+
<th>(seed=300)**</th>
|
| 23 |
+
<th>(seed=400)**</th>
|
| 24 |
+
<th>(seed=500)**</th>
|
| 25 |
+
</tr>
|
| 26 |
+
<tr>
|
| 27 |
+
<td>
|
| 28 |
+
text-to-image
|
| 29 |
+
</td>
|
| 30 |
+
<td>
|
| 31 |
+
<img src="images/couple-having-fun-(generated)-300.jpg" width="600"/>
|
| 32 |
+
</td>
|
| 33 |
+
<td>
|
| 34 |
+
<img src="images/couple-having-fun-(generated)-400.jpg" width="600"/>
|
| 35 |
+
</td>
|
| 36 |
+
<td>
|
| 37 |
+
<img src="images/couple-having-fun-(generated)-500.jpg" width="600"/>
|
| 38 |
+
</td>
|
| 39 |
+
</tr>
|
| 40 |
+
<tr>
|
| 41 |
+
<td>
|
| 42 |
+
image-to-image
|
| 43 |
+
</td>
|
| 44 |
+
<td>
|
| 45 |
+
<img src="images/couple-having-fun-(from-generated)-300.jpg" width="600"/>
|
| 46 |
+
</td>
|
| 47 |
+
<td>
|
| 48 |
+
<img src="images/couple-having-fun-(from-generated)-400.jpg" width="600"/>
|
| 49 |
+
</td>
|
| 50 |
+
<td>
|
| 51 |
+
<img src="images/couple-having-fun-(from-generated)-500.jpg" width="600"/>
|
| 52 |
+
</td>
|
| 53 |
+
</tr>
|
| 54 |
+
</table>
|
| 55 |
+
|
| 56 |
+
<table>
|
| 57 |
+
<tr>
|
| 58 |
+
<th></th>
|
| 59 |
+
<th>(seed=300)**</th>
|
| 60 |
+
<th>(seed=400)**</th>
|
| 61 |
+
<th>(seed=500)**</th>
|
| 62 |
+
</tr>
|
| 63 |
+
<tr>
|
| 64 |
+
<td>
|
| 65 |
+
text-to-image
|
| 66 |
+
</td>
|
| 67 |
+
<td>
|
| 68 |
+
<img src="images/girl-posing-photo-(generated)-300.jpg" width="400"/>
|
| 69 |
+
</td>
|
| 70 |
+
<td>
|
| 71 |
+
<img src="images/girl-posing-photo-(generated)-400.jpg" width="400"/>
|
| 72 |
+
</td>
|
| 73 |
+
<td>
|
| 74 |
+
<img src="images/girl-posing-photo-(generated)-500.jpg" width="400"/>
|
| 75 |
+
</td>
|
| 76 |
+
</tr>
|
| 77 |
+
<tr>
|
| 78 |
+
<td>
|
| 79 |
+
image-to-image
|
| 80 |
+
</td>
|
| 81 |
+
<td>
|
| 82 |
+
<img src="images/girl-posing-photo-(from-generated)-300.jpg" width="400"/>
|
| 83 |
+
</td>
|
| 84 |
+
<td>
|
| 85 |
+
<img src="images/girl-posing-photo-(from-generated)-400.jpg" width="400"/>
|
| 86 |
+
</td>
|
| 87 |
+
<td>
|
| 88 |
+
<img src="images/girl-posing-photo-(from-generated)-500.jpg" width="400"/>
|
| 89 |
+
</td>
|
| 90 |
+
</tr>
|
| 91 |
+
</table>
|
| 92 |
+
|
| 93 |
+
<table>
|
| 94 |
+
<tr>
|
| 95 |
+
<th>Base image (from another model)</th>
|
| 96 |
+
<th>image-to-image (seed=300)**</th>
|
| 97 |
+
<th>image-to-image (seed=400)**</th>
|
| 98 |
+
<th>image-to-image (seed=500)**</th>
|
| 99 |
+
</tr>
|
| 100 |
+
<tr>
|
| 101 |
+
<td>
|
| 102 |
+
<img src="images/couple-having-fun-(original).jpg" width="600"/>
|
| 103 |
+
</td>
|
| 104 |
+
<td>
|
| 105 |
+
<img src="images/couple-having-fun-(from-original)-300.jpg" width="600"/>
|
| 106 |
+
</td>
|
| 107 |
+
<td>
|
| 108 |
+
<img src="images/couple-having-fun-(from-original)-400.jpg" width="600"/>
|
| 109 |
+
</td>
|
| 110 |
+
<td>
|
| 111 |
+
<img src="images/couple-having-fun-(from-original)-500.jpg" width="600"/>
|
| 112 |
+
</td>
|
| 113 |
+
</tr>
|
| 114 |
+
<tr>
|
| 115 |
+
<td>
|
| 116 |
+
<img src="images/girl-posing-photo-(original).jpg" width="600"/>
|
| 117 |
+
</td>
|
| 118 |
+
<td>
|
| 119 |
+
<img src="images/girl-posing-photo-(from-original)-300.jpg" width="600"/>
|
| 120 |
+
</td>
|
| 121 |
+
<td>
|
| 122 |
+
<img src="images/girl-posing-photo-(from-original)-400.jpg" width="600"/>
|
| 123 |
+
</td>
|
| 124 |
+
<td>
|
| 125 |
+
<img src="images/girl-posing-photo-(from-original)-500.jpg" width="600"/>
|
| 126 |
+
</td>
|
| 127 |
+
</tr>
|
| 128 |
+
<tr>
|
| 129 |
+
<th>Base image (from another model)</th>
|
| 130 |
+
<th>image-to-image (seed=300)*</th>
|
| 131 |
+
<th>image-to-image (seed=400)*</th>
|
| 132 |
+
<th>image-to-image (seed=500)*</th>
|
| 133 |
+
</tr>
|
| 134 |
+
</table>
|
| 135 |
+
|
| 136 |
+
** using these defaults unless specified:
|
| 137 |
+
<table>
|
| 138 |
+
<tr>
|
| 139 |
+
<th>Setting</th>
|
| 140 |
+
<th>Default value</th>
|
| 141 |
+
</tr>
|
| 142 |
+
<tr>
|
| 143 |
+
<td>prompt (landscape)</td>
|
| 144 |
+
<td>landscape image, a boy and girl having fun on the beach</td>
|
| 145 |
+
</tr>
|
| 146 |
+
<tr>
|
| 147 |
+
<td>prompt (portrait)</td>
|
| 148 |
+
<td>portrait image, a girl in a nice dress posing for a photo</td>
|
| 149 |
+
</tr>
|
| 150 |
+
<tr>
|
| 151 |
+
<td>negative prompt</td>
|
| 152 |
+
<td>deformed iris, deformed pupils, bad anatomy, cloned face, extra arms, extra legs, missing fingers, too many fingers</td>
|
| 153 |
+
</tr>
|
| 154 |
+
<tr>
|
| 155 |
+
<td>size (landscape)</td>
|
| 156 |
+
<td>1024 x 768</td>
|
| 157 |
+
</tr>
|
| 158 |
+
<tr>
|
| 159 |
+
<td>size (portrait)</td>
|
| 160 |
+
<td> 768 x 1024</td>
|
| 161 |
+
</tr>
|
| 162 |
+
<tr>
|
| 163 |
+
<td>seed</td>
|
| 164 |
+
<td>300</td>
|
| 165 |
+
</tr>
|
| 166 |
+
<tr>
|
| 167 |
+
<td>guidance scale</td>
|
| 168 |
+
<td>12.0</td>
|
| 169 |
+
</tr>
|
| 170 |
+
<tr>
|
| 171 |
+
<td>strength</td>
|
| 172 |
+
<td>0.5</td>
|
| 173 |
+
</tr>
|
| 174 |
+
<tr>
|
| 175 |
+
<td>inference steps</td>
|
| 176 |
+
<td>30</td>
|
| 177 |
+
</tr>
|
| 178 |
+
</table>
|
| 179 |
+
|
| 180 |
+
## Diffusers
|
| 181 |
+
|
| 182 |
+
For more general information on how to run text-to-image models with 🧨 Diffusers, see [the docs](https://huggingface.co/docs/diffusers/using-diffusers/conditional_image_generation).
|
| 183 |
+
|
| 184 |
+
1. Installation
|
| 185 |
+
|
| 186 |
+
```
|
| 187 |
+
pip install diffusers transformers accelerate
|
| 188 |
+
```
|
| 189 |
+
|
| 190 |
+
2. Running example for text-to-image generation
|
| 191 |
+
```py
|
| 192 |
+
import torch
|
| 193 |
+
|
| 194 |
+
from diffusers import AutoPipelineForText2Image
|
| 195 |
+
|
| 196 |
+
pipe = AutoPipelineForText2Image.from_pretrained('MykosX/leia-photo-sd', torch_dtype=torch.float32)
|
| 197 |
+
pipe = pipe.to("cpu")
|
| 198 |
+
|
| 199 |
+
prompt = "portrait image, a girl in a nice dress posing for a photo"
|
| 200 |
+
|
| 201 |
+
image = pipe(prompt).images[0]
|
| 202 |
+
image.save("./images/text-to-image.png")
|
| 203 |
+
```
|
| 204 |
+
|
| 205 |
+
3. Running example for image-to-image generation
|
| 206 |
+
```py
|
| 207 |
+
import torch
|
| 208 |
+
|
| 209 |
+
from diffusers import AutoPipelineForImage2Image
|
| 210 |
+
from PIL import Image
|
| 211 |
+
|
| 212 |
+
pipe = AutoPipelineForImage2Image.from_pretrained('MykosX/leia-photo-sd', torch_dtype=torch.float32)
|
| 213 |
+
pipe = pipe.to("cpu")
|
| 214 |
+
|
| 215 |
+
base_image = Image.open("./images/girl-posing-photo-(original).jpg")
|
| 216 |
+
prompt = "portrait image, a girl in a nice dress posing for a photo"
|
| 217 |
+
|
| 218 |
+
image = pipe(prompt, image=base_image).images[0]
|
| 219 |
+
image.save("./images/image-to-image.png")
|
| 220 |
+
```
|
| 221 |
+
|
| 222 |
+
## PS
|
| 223 |
+
Play with the model and don't hesitate to show off
|
feature_extractor/preprocessor_config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"crop_size": {
|
| 3 |
+
"height": 224,
|
| 4 |
+
"width": 224
|
| 5 |
+
},
|
| 6 |
+
"do_center_crop": true,
|
| 7 |
+
"do_convert_rgb": true,
|
| 8 |
+
"do_normalize": true,
|
| 9 |
+
"do_rescale": true,
|
| 10 |
+
"do_resize": true,
|
| 11 |
+
"image_mean": [
|
| 12 |
+
0.48145466,
|
| 13 |
+
0.4578275,
|
| 14 |
+
0.40821073
|
| 15 |
+
],
|
| 16 |
+
"image_processor_type": "CLIPImageProcessor",
|
| 17 |
+
"image_std": [
|
| 18 |
+
0.26862954,
|
| 19 |
+
0.26130258,
|
| 20 |
+
0.27577711
|
| 21 |
+
],
|
| 22 |
+
"resample": 3,
|
| 23 |
+
"rescale_factor": 0.00392156862745098,
|
| 24 |
+
"size": {
|
| 25 |
+
"shortest_edge": 224
|
| 26 |
+
}
|
| 27 |
+
}
|
images/couple-having-fun-(from-generated)-300.jpg
ADDED
|
images/couple-having-fun-(from-generated)-400.jpg
ADDED
|
images/couple-having-fun-(from-generated)-500.jpg
ADDED
|
images/couple-having-fun-(from-original)-300.jpg
ADDED
|
Git LFS Details
|
images/couple-having-fun-(from-original)-400.jpg
ADDED
|
Git LFS Details
|
images/couple-having-fun-(from-original)-500.jpg
ADDED
|
Git LFS Details
|
images/couple-having-fun-(generated)-300.jpg
ADDED
|
images/couple-having-fun-(generated)-400.jpg
ADDED
|
images/couple-having-fun-(generated)-500.jpg
ADDED
|
images/couple-having-fun-(original).jpg
ADDED
|
Git LFS Details
|
images/girl-posing-photo-(from-generated)-300.jpg
ADDED
|
images/girl-posing-photo-(from-generated)-400.jpg
ADDED
|
images/girl-posing-photo-(from-generated)-500.jpg
ADDED
|
images/girl-posing-photo-(from-original)-300.jpg
ADDED
|
images/girl-posing-photo-(from-original)-400.jpg
ADDED
|
images/girl-posing-photo-(from-original)-500.jpg
ADDED
|
images/girl-posing-photo-(generated)-300.jpg
ADDED
|
images/girl-posing-photo-(generated)-400.jpg
ADDED
|
images/girl-posing-photo-(generated)-500.jpg
ADDED
|
images/girl-posing-photo-(original).jpg
ADDED
|
model_index.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "StableDiffusionPipeline",
|
| 3 |
+
"_diffusers_version": "0.33.1",
|
| 4 |
+
"_name_or_path": "d:\\tools\\ai\\mix\\mixx",
|
| 5 |
+
"feature_extractor": [
|
| 6 |
+
"transformers",
|
| 7 |
+
"CLIPImageProcessor"
|
| 8 |
+
],
|
| 9 |
+
"image_encoder": [
|
| 10 |
+
null,
|
| 11 |
+
null
|
| 12 |
+
],
|
| 13 |
+
"requires_safety_checker": false,
|
| 14 |
+
"safety_checker": [
|
| 15 |
+
null,
|
| 16 |
+
null
|
| 17 |
+
],
|
| 18 |
+
"scheduler": [
|
| 19 |
+
"diffusers",
|
| 20 |
+
"EulerAncestralDiscreteScheduler"
|
| 21 |
+
],
|
| 22 |
+
"text_encoder": [
|
| 23 |
+
"transformers",
|
| 24 |
+
"CLIPTextModel"
|
| 25 |
+
],
|
| 26 |
+
"tokenizer": [
|
| 27 |
+
"transformers",
|
| 28 |
+
"CLIPTokenizer"
|
| 29 |
+
],
|
| 30 |
+
"unet": [
|
| 31 |
+
"diffusers",
|
| 32 |
+
"UNet2DConditionModel"
|
| 33 |
+
],
|
| 34 |
+
"vae": [
|
| 35 |
+
"diffusers",
|
| 36 |
+
"AutoencoderKL"
|
| 37 |
+
]
|
| 38 |
+
}
|
scheduler/scheduler_config.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "EulerAncestralDiscreteScheduler",
|
| 3 |
+
"_diffusers_version": "0.33.1",
|
| 4 |
+
"beta_end": 0.012,
|
| 5 |
+
"beta_schedule": "scaled_linear",
|
| 6 |
+
"beta_start": 0.00085,
|
| 7 |
+
"clip_sample": false,
|
| 8 |
+
"interpolation_type": "linear",
|
| 9 |
+
"num_train_timesteps": 1000,
|
| 10 |
+
"prediction_type": "epsilon",
|
| 11 |
+
"rescale_betas_zero_snr": false,
|
| 12 |
+
"sample_max_value": 1.0,
|
| 13 |
+
"set_alpha_to_one": false,
|
| 14 |
+
"skip_prk_steps": true,
|
| 15 |
+
"steps_offset": 1,
|
| 16 |
+
"timestep_spacing": "leading",
|
| 17 |
+
"trained_betas": null,
|
| 18 |
+
"use_karras_sigmas": false
|
| 19 |
+
}
|
text_encoder/config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"CLIPTextModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_dropout": 0.0,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"dropout": 0.0,
|
| 8 |
+
"eos_token_id": 2,
|
| 9 |
+
"hidden_act": "quick_gelu",
|
| 10 |
+
"hidden_size": 768,
|
| 11 |
+
"initializer_factor": 1.0,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3072,
|
| 14 |
+
"layer_norm_eps": 1e-05,
|
| 15 |
+
"max_position_embeddings": 77,
|
| 16 |
+
"model_type": "clip_text_model",
|
| 17 |
+
"num_attention_heads": 12,
|
| 18 |
+
"num_hidden_layers": 12,
|
| 19 |
+
"pad_token_id": 1,
|
| 20 |
+
"projection_dim": 768,
|
| 21 |
+
"torch_dtype": "float16",
|
| 22 |
+
"transformers_version": "4.52.4",
|
| 23 |
+
"vocab_size": 49408
|
| 24 |
+
}
|
text_encoder/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87cc8e6f1f4110d4fc7de62fee1af1450deef176a61c42741b7e22bd1449dbe3
|
| 3 |
+
size 246144152
|
tokenizer/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer/special_tokens_map.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<|startoftext|>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": true,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "<|endoftext|>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": true,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "<|endoftext|>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": true,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"unk_token": {
|
| 24 |
+
"content": "<|endoftext|>",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": true,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
}
|
| 30 |
+
}
|
tokenizer/tokenizer_config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"49406": {
|
| 5 |
+
"content": "<|startoftext|>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": true,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
},
|
| 12 |
+
"49407": {
|
| 13 |
+
"content": "<|endoftext|>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": true,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false,
|
| 18 |
+
"special": true
|
| 19 |
+
}
|
| 20 |
+
},
|
| 21 |
+
"bos_token": "<|startoftext|>",
|
| 22 |
+
"clean_up_tokenization_spaces": true,
|
| 23 |
+
"do_lower_case": true,
|
| 24 |
+
"eos_token": "<|endoftext|>",
|
| 25 |
+
"errors": "replace",
|
| 26 |
+
"extra_special_tokens": {},
|
| 27 |
+
"model_max_length": 77,
|
| 28 |
+
"pad_token": "<|endoftext|>",
|
| 29 |
+
"tokenizer_class": "CLIPTokenizer",
|
| 30 |
+
"unk_token": "<|endoftext|>"
|
| 31 |
+
}
|
tokenizer/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
unet/config.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "UNet2DConditionModel",
|
| 3 |
+
"_diffusers_version": "0.33.1",
|
| 4 |
+
"_name_or_path": "d:\\tools\\ai\\mix\\mixx\\unet",
|
| 5 |
+
"act_fn": "silu",
|
| 6 |
+
"addition_embed_type": null,
|
| 7 |
+
"addition_embed_type_num_heads": 64,
|
| 8 |
+
"addition_time_embed_dim": null,
|
| 9 |
+
"attention_head_dim": 8,
|
| 10 |
+
"attention_type": "default",
|
| 11 |
+
"block_out_channels": [
|
| 12 |
+
320,
|
| 13 |
+
640,
|
| 14 |
+
1280,
|
| 15 |
+
1280
|
| 16 |
+
],
|
| 17 |
+
"center_input_sample": false,
|
| 18 |
+
"class_embed_type": null,
|
| 19 |
+
"class_embeddings_concat": false,
|
| 20 |
+
"conv_in_kernel": 3,
|
| 21 |
+
"conv_out_kernel": 3,
|
| 22 |
+
"cross_attention_dim": 768,
|
| 23 |
+
"cross_attention_norm": null,
|
| 24 |
+
"down_block_types": [
|
| 25 |
+
"CrossAttnDownBlock2D",
|
| 26 |
+
"CrossAttnDownBlock2D",
|
| 27 |
+
"CrossAttnDownBlock2D",
|
| 28 |
+
"DownBlock2D"
|
| 29 |
+
],
|
| 30 |
+
"downsample_padding": 1,
|
| 31 |
+
"dropout": 0.0,
|
| 32 |
+
"dual_cross_attention": false,
|
| 33 |
+
"encoder_hid_dim": null,
|
| 34 |
+
"encoder_hid_dim_type": null,
|
| 35 |
+
"flip_sin_to_cos": true,
|
| 36 |
+
"freq_shift": 0,
|
| 37 |
+
"in_channels": 4,
|
| 38 |
+
"layers_per_block": 2,
|
| 39 |
+
"mid_block_only_cross_attention": null,
|
| 40 |
+
"mid_block_scale_factor": 1,
|
| 41 |
+
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
| 42 |
+
"norm_eps": 1e-05,
|
| 43 |
+
"norm_num_groups": 32,
|
| 44 |
+
"num_attention_heads": null,
|
| 45 |
+
"num_class_embeds": null,
|
| 46 |
+
"only_cross_attention": false,
|
| 47 |
+
"out_channels": 4,
|
| 48 |
+
"projection_class_embeddings_input_dim": null,
|
| 49 |
+
"resnet_out_scale_factor": 1.0,
|
| 50 |
+
"resnet_skip_time_act": false,
|
| 51 |
+
"resnet_time_scale_shift": "default",
|
| 52 |
+
"reverse_transformer_layers_per_block": null,
|
| 53 |
+
"sample_size": 64,
|
| 54 |
+
"time_cond_proj_dim": null,
|
| 55 |
+
"time_embedding_act_fn": null,
|
| 56 |
+
"time_embedding_dim": null,
|
| 57 |
+
"time_embedding_type": "positional",
|
| 58 |
+
"timestep_post_act": null,
|
| 59 |
+
"transformer_layers_per_block": 1,
|
| 60 |
+
"up_block_types": [
|
| 61 |
+
"UpBlock2D",
|
| 62 |
+
"CrossAttnUpBlock2D",
|
| 63 |
+
"CrossAttnUpBlock2D",
|
| 64 |
+
"CrossAttnUpBlock2D"
|
| 65 |
+
],
|
| 66 |
+
"upcast_attention": false,
|
| 67 |
+
"use_linear_projection": false
|
| 68 |
+
}
|
unet/diffusion_pytorch_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e73e60356a82ffeacf57bf509bdee51ffd77a7d1ac1ae017db2fe33730f86454
|
| 3 |
+
size 1719125304
|
vae/config.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "AutoencoderKL",
|
| 3 |
+
"_diffusers_version": "0.33.1",
|
| 4 |
+
"_name_or_path": "d:\\tools\\ai\\mix\\mixx\\vae",
|
| 5 |
+
"act_fn": "silu",
|
| 6 |
+
"block_out_channels": [
|
| 7 |
+
128,
|
| 8 |
+
256,
|
| 9 |
+
512,
|
| 10 |
+
512
|
| 11 |
+
],
|
| 12 |
+
"down_block_types": [
|
| 13 |
+
"DownEncoderBlock2D",
|
| 14 |
+
"DownEncoderBlock2D",
|
| 15 |
+
"DownEncoderBlock2D",
|
| 16 |
+
"DownEncoderBlock2D"
|
| 17 |
+
],
|
| 18 |
+
"force_upcast": true,
|
| 19 |
+
"in_channels": 3,
|
| 20 |
+
"latent_channels": 4,
|
| 21 |
+
"latents_mean": null,
|
| 22 |
+
"latents_std": null,
|
| 23 |
+
"layers_per_block": 2,
|
| 24 |
+
"mid_block_add_attention": true,
|
| 25 |
+
"norm_num_groups": 32,
|
| 26 |
+
"out_channels": 3,
|
| 27 |
+
"sample_size": 512,
|
| 28 |
+
"scaling_factor": 0.18215,
|
| 29 |
+
"shift_factor": null,
|
| 30 |
+
"up_block_types": [
|
| 31 |
+
"UpDecoderBlock2D",
|
| 32 |
+
"UpDecoderBlock2D",
|
| 33 |
+
"UpDecoderBlock2D",
|
| 34 |
+
"UpDecoderBlock2D"
|
| 35 |
+
],
|
| 36 |
+
"use_post_quant_conv": true,
|
| 37 |
+
"use_quant_conv": true
|
| 38 |
+
}
|
vae/diffusion_pytorch_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94703a575c1a261653520bf56327b536a1da3f68cb0e420a52d46ffe8d45167c
|
| 3 |
+
size 167335342
|