MykosX commited on Sep 8

Commit

a35db05

verified ·

1 Parent(s): 9fc66f0

uploaded model

Browse files

Files changed (36) hide show

.gitattributes +4 -0
LICENSE +15 -0
README.md +223 -0
feature_extractor/preprocessor_config.json +27 -0
images/couple-having-fun-(from-generated)-300.jpg +0 -0
images/couple-having-fun-(from-generated)-400.jpg +0 -0
images/couple-having-fun-(from-generated)-500.jpg +0 -0
images/couple-having-fun-(from-original)-300.jpg +3 -0
images/couple-having-fun-(from-original)-400.jpg +3 -0
images/couple-having-fun-(from-original)-500.jpg +3 -0
images/couple-having-fun-(generated)-300.jpg +0 -0
images/couple-having-fun-(generated)-400.jpg +0 -0
images/couple-having-fun-(generated)-500.jpg +0 -0
images/couple-having-fun-(original).jpg +3 -0
images/girl-posing-photo-(from-generated)-300.jpg +0 -0
images/girl-posing-photo-(from-generated)-400.jpg +0 -0
images/girl-posing-photo-(from-generated)-500.jpg +0 -0
images/girl-posing-photo-(from-original)-300.jpg +0 -0
images/girl-posing-photo-(from-original)-400.jpg +0 -0
images/girl-posing-photo-(from-original)-500.jpg +0 -0
images/girl-posing-photo-(generated)-300.jpg +0 -0
images/girl-posing-photo-(generated)-400.jpg +0 -0
images/girl-posing-photo-(generated)-500.jpg +0 -0
images/girl-posing-photo-(original).jpg +0 -0
model_index.json +38 -0
scheduler/scheduler_config.json +19 -0
text_encoder/config.json +24 -0
text_encoder/model.safetensors +3 -0
tokenizer/merges.txt +0 -0
tokenizer/special_tokens_map.json +30 -0
tokenizer/tokenizer_config.json +31 -0
tokenizer/vocab.json +0 -0
unet/config.json +68 -0
unet/diffusion_pytorch_model.safetensors +3 -0
vae/config.json +38 -0
vae/diffusion_pytorch_model.safetensors +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+images/couple-having-fun-(from-original)-300.jpg filter=lfs diff=lfs merge=lfs -text
+images/couple-having-fun-(from-original)-400.jpg filter=lfs diff=lfs merge=lfs -text
+images/couple-having-fun-(from-original)-500.jpg filter=lfs diff=lfs merge=lfs -text
+images/couple-having-fun-(original).jpg filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,15 @@

+Copyright (c) 2025 MykosX
+Permission is hereby granted, free of charge, to any person or entity obtaining a copy of this software and associated documentation files (the “Software”), to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, including the right to use the output and results produced by the Software for any purpose, commercial or non-commercial.
+Conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+Modifications must be clearly marked as such.
+Disclaimer:
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+By using this code, you agree to these terms.

README.md ADDED Viewed

	@@ -0,0 +1,223 @@

+---
+language:
+- en
+tags:
+- stable-diffusion
+- text-to-image
+- image-to-image
+- photo
+pipeline_tag: text-to-image
+---
+# Leia photo sd
+`MykosX/leia-photo-sd` is a Stable Diffusion model that can be used both for:
+- text-to-image: generates quite good photo images, may generate bad faces for people far from viewer
+- image-to-image: tends to improve the quality of images generated by this model, does a good work on images from other models
+## Image show-case
+<table>
+  <tr>
+    <th></th>
+    <th>(seed=300)**</th>
+    <th>(seed=400)**</th>
+    <th>(seed=500)**</th>
+  </tr>
+  <tr>
+    <td>
+        text-to-image
+    </td>
+    <td>
+        <img src="images/couple-having-fun-(generated)-300.jpg" width="600"/>
+    </td>
+    <td>
+        <img src="images/couple-having-fun-(generated)-400.jpg" width="600"/>
+    </td>
+    <td>
+        <img src="images/couple-having-fun-(generated)-500.jpg" width="600"/>
+    </td>
+  </tr>
+  <tr>
+    <td>
+        image-to-image
+    </td>
+    <td>
+        <img src="images/couple-having-fun-(from-generated)-300.jpg" width="600"/>
+    </td>
+    <td>
+        <img src="images/couple-having-fun-(from-generated)-400.jpg" width="600"/>
+    </td>
+    <td>
+        <img src="images/couple-having-fun-(from-generated)-500.jpg" width="600"/>
+    </td>
+  </tr>
+</table>
+<table>
+  <tr>
+    <th></th>
+    <th>(seed=300)**</th>
+    <th>(seed=400)**</th>
+    <th>(seed=500)**</th>
+  </tr>
+  <tr>
+    <td>
+        text-to-image
+    </td>
+    <td>
+        <img src="images/girl-posing-photo-(generated)-300.jpg" width="400"/>
+    </td>
+    <td>
+        <img src="images/girl-posing-photo-(generated)-400.jpg" width="400"/>
+    </td>
+    <td>
+        <img src="images/girl-posing-photo-(generated)-500.jpg" width="400"/>
+    </td>
+  </tr>
+  <tr>
+    <td>
+        image-to-image
+    </td>
+    <td>
+        <img src="images/girl-posing-photo-(from-generated)-300.jpg" width="400"/>
+    </td>
+    <td>
+        <img src="images/girl-posing-photo-(from-generated)-400.jpg" width="400"/>
+    </td>
+    <td>
+        <img src="images/girl-posing-photo-(from-generated)-500.jpg" width="400"/>
+    </td>
+  </tr>
+</table>
+<table>
+  <tr>
+    <th>Base image (from another model)</th>
+    <th>image-to-image (seed=300)**</th>
+    <th>image-to-image (seed=400)**</th>
+    <th>image-to-image (seed=500)**</th>
+  </tr>
+  <tr>
+    <td>
+        <img src="images/couple-having-fun-(original).jpg" width="600"/>
+    </td>
+    <td>
+        <img src="images/couple-having-fun-(from-original)-300.jpg" width="600"/>
+    </td>
+    <td>
+        <img src="images/couple-having-fun-(from-original)-400.jpg" width="600"/>
+    </td>
+    <td>
+        <img src="images/couple-having-fun-(from-original)-500.jpg" width="600"/>
+    </td>
+  </tr>
+  <tr>
+    <td>
+        <img src="images/girl-posing-photo-(original).jpg" width="600"/>
+    </td>
+    <td>
+        <img src="images/girl-posing-photo-(from-original)-300.jpg" width="600"/>
+    </td>
+    <td>
+        <img src="images/girl-posing-photo-(from-original)-400.jpg" width="600"/>
+    </td>
+    <td>
+        <img src="images/girl-posing-photo-(from-original)-500.jpg" width="600"/>
+    </td>
+  </tr>
+  <tr>
+    <th>Base image (from another model)</th>
+    <th>image-to-image (seed=300)*</th>
+    <th>image-to-image (seed=400)*</th>
+    <th>image-to-image (seed=500)*</th>
+  </tr>
+</table>
+** using these defaults unless specified:
+<table>
+  <tr>
+    <th>Setting</th>
+    <th>Default value</th>
+  </tr>
+  <tr>
+    <td>prompt (landscape)</td>
+    <td>landscape image, a boy and girl having fun on the beach</td>
+  </tr>
+  <tr>
+    <td>prompt (portrait)</td>
+    <td>portrait image, a girl in a nice dress posing for a photo</td>
+  </tr>
+  <tr>
+    <td>negative prompt</td>
+    <td>deformed iris, deformed pupils, bad anatomy, cloned face, extra arms, extra legs, missing fingers, too many fingers</td>
+  </tr>
+  <tr>
+    <td>size (landscape)</td>
+    <td>1024 x 768</td>
+  </tr>
+  <tr>
+    <td>size (portrait)</td>
+    <td> 768 x 1024</td>
+  </tr>
+  <tr>
+    <td>seed</td>
+    <td>300</td>
+  </tr>
+  <tr>
+    <td>guidance scale</td>
+    <td>12.0</td>
+  </tr>
+  <tr>
+    <td>strength</td>
+    <td>0.5</td>
+  </tr>
+  <tr>
+    <td>inference steps</td>
+    <td>30</td>
+  </tr>
+</table>
+## Diffusers
+For more general information on how to run text-to-image models with 🧨 Diffusers, see [the docs](https://huggingface.co/docs/diffusers/using-diffusers/conditional_image_generation).
+1. Installation
+```
+pip install diffusers transformers accelerate
+```
+2. Running example for text-to-image generation
+```py
+import torch
+from   diffusers import AutoPipelineForText2Image
+pipe = AutoPipelineForText2Image.from_pretrained('MykosX/leia-photo-sd', torch_dtype=torch.float32)
+pipe = pipe.to("cpu")
+prompt = "portrait image, a girl in a nice dress posing for a photo"
+image = pipe(prompt).images[0]
+image.save("./images/text-to-image.png")
+```
+3. Running example for image-to-image generation
+```py
+import torch
+from   diffusers import AutoPipelineForImage2Image
+from   PIL       import Image
+pipe = AutoPipelineForImage2Image.from_pretrained('MykosX/leia-photo-sd', torch_dtype=torch.float32)
+pipe = pipe.to("cpu")
+base_image = Image.open("./images/girl-posing-photo-(original).jpg")
+prompt = "portrait image, a girl in a nice dress posing for a photo"
+image = pipe(prompt, image=base_image).images[0]
+image.save("./images/image-to-image.png")
+```
+## PS
+Play with the model and don't hesitate to show off

feature_extractor/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "crop_size": {
+    "height": 224,
+    "width": 224
+  },
+  "do_center_crop": true,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "CLIPImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "shortest_edge": 224
+  }
+}

images/couple-having-fun-(from-generated)-300.jpg ADDED Viewed

images/couple-having-fun-(from-generated)-400.jpg ADDED Viewed

images/couple-having-fun-(from-generated)-500.jpg ADDED Viewed

images/couple-having-fun-(from-original)-300.jpg ADDED Viewed

Git LFS Details

SHA256: 7a9e42e538faf2ad53a5c6e745e8c121fab0ae7b00133929039e756c31d568d7
Pointer size: 131 Bytes
Size of remote file: 122 kB

images/couple-having-fun-(from-original)-400.jpg ADDED Viewed

Git LFS Details

SHA256: ccea9ebaf889e8f48f1b66eb1b593a67275938103de63cb2b40c441c8695789d
Pointer size: 131 Bytes
Size of remote file: 126 kB

images/couple-having-fun-(from-original)-500.jpg ADDED Viewed

Git LFS Details

SHA256: 5cf0a7ee465d3d2010941f78cb42d5762ae3c6b4d8f855010522bf3541c7495b
Pointer size: 131 Bytes
Size of remote file: 117 kB

images/couple-having-fun-(generated)-300.jpg ADDED Viewed

images/couple-having-fun-(generated)-400.jpg ADDED Viewed

images/couple-having-fun-(generated)-500.jpg ADDED Viewed

images/couple-having-fun-(original).jpg ADDED Viewed

Git LFS Details

SHA256: f4b71e1ae245e54d14cd48ed58f8bf43071e87766322b16d534dad927f70b92a
Pointer size: 131 Bytes
Size of remote file: 123 kB

images/girl-posing-photo-(from-generated)-300.jpg ADDED Viewed

images/girl-posing-photo-(from-generated)-400.jpg ADDED Viewed

images/girl-posing-photo-(from-generated)-500.jpg ADDED Viewed

images/girl-posing-photo-(from-original)-300.jpg ADDED Viewed

images/girl-posing-photo-(from-original)-400.jpg ADDED Viewed

images/girl-posing-photo-(from-original)-500.jpg ADDED Viewed

images/girl-posing-photo-(generated)-300.jpg ADDED Viewed

images/girl-posing-photo-(generated)-400.jpg ADDED Viewed

images/girl-posing-photo-(generated)-500.jpg ADDED Viewed

images/girl-posing-photo-(original).jpg ADDED Viewed

model_index.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "_class_name": "StableDiffusionPipeline",
+  "_diffusers_version": "0.33.1",
+  "_name_or_path": "d:\\tools\\ai\\mix\\mixx",
+  "feature_extractor": [
+    "transformers",
+    "CLIPImageProcessor"
+  ],
+  "image_encoder": [
+    null,
+    null
+  ],
+  "requires_safety_checker": false,
+  "safety_checker": [
+    null,
+    null
+  ],
+  "scheduler": [
+    "diffusers",
+    "EulerAncestralDiscreteScheduler"
+  ],
+  "text_encoder": [
+    "transformers",
+    "CLIPTextModel"
+  ],
+  "tokenizer": [
+    "transformers",
+    "CLIPTokenizer"
+  ],
+  "unet": [
+    "diffusers",
+    "UNet2DConditionModel"
+  ],
+  "vae": [
+    "diffusers",
+    "AutoencoderKL"
+  ]
+}

scheduler/scheduler_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "_class_name": "EulerAncestralDiscreteScheduler",
+  "_diffusers_version": "0.33.1",
+  "beta_end": 0.012,
+  "beta_schedule": "scaled_linear",
+  "beta_start": 0.00085,
+  "clip_sample": false,
+  "interpolation_type": "linear",
+  "num_train_timesteps": 1000,
+  "prediction_type": "epsilon",
+  "rescale_betas_zero_snr": false,
+  "sample_max_value": 1.0,
+  "set_alpha_to_one": false,
+  "skip_prk_steps": true,
+  "steps_offset": 1,
+  "timestep_spacing": "leading",
+  "trained_betas": null,
+  "use_karras_sigmas": false
+}

text_encoder/config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "architectures": [
+    "CLIPTextModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "dropout": 0.0,
+  "eos_token_id": 2,
+  "hidden_act": "quick_gelu",
+  "hidden_size": 768,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 77,
+  "model_type": "clip_text_model",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "projection_dim": 768,
+  "torch_dtype": "float16",
+  "transformers_version": "4.52.4",
+  "vocab_size": 49408
+}

text_encoder/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:87cc8e6f1f4110d4fc7de62fee1af1450deef176a61c42741b7e22bd1449dbe3
+size 246144152

tokenizer/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "49406": {
+      "content": "<|startoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49407": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|startoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "do_lower_case": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 77,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "CLIPTokenizer",
+  "unk_token": "<|endoftext|>"
+}

tokenizer/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

unet/config.json ADDED Viewed

	@@ -0,0 +1,68 @@

+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.33.1",
+  "_name_or_path": "d:\\tools\\ai\\mix\\mixx\\unet",
+  "act_fn": "silu",
+  "addition_embed_type": null,
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": null,
+  "attention_head_dim": 8,
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 768,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": null,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 64,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": 1,
+  "up_block_types": [
+    "UpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D"
+  ],
+  "upcast_attention": false,
+  "use_linear_projection": false
+}

unet/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e73e60356a82ffeacf57bf509bdee51ffd77a7d1ac1ae017db2fe33730f86454
+size 1719125304

vae/config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.33.1",
+  "_name_or_path": "d:\\tools\\ai\\mix\\mixx\\vae",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "force_upcast": true,
+  "in_channels": 3,
+  "latent_channels": 4,
+  "latents_mean": null,
+  "latents_std": null,
+  "layers_per_block": 2,
+  "mid_block_add_attention": true,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 512,
+  "scaling_factor": 0.18215,
+  "shift_factor": null,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ],
+  "use_post_quant_conv": true,
+  "use_quant_conv": true
+}

vae/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:94703a575c1a261653520bf56327b536a1da3f68cb0e420a52d46ffe8d45167c
+size 167335342