Spaces:

rbanfield
/

ControlNetV1.1

Runtime error

File size: 7,802 Bytes

cc579f2

#!/usr/bin/env python

import cv2
import numpy as np
import torch
import random
import base64
import json
import threading
import uuid
import math

import io
from PIL import Image

from diffusers import AutoencoderKL, StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler,StableDiffusionControlNetImg2ImgPipeline,StableDiffusionXLControlNetPipeline,DiffusionPipeline
from diffusers.utils import load_image
from transformers import pipeline

import gradio as gr

vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)


canny_controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16)
canny_pipe = StableDiffusionControlNetPipeline.from_pretrained(
    "SG161222/Realistic_Vision_V3.0_VAE", controlnet=canny_controlnet, torch_dtype=torch.float16, use_safetensors=True
)

canny_controlnet_tile = ControlNetModel.from_pretrained("lllyasviel/control_v11f1e_sd15_tile", torch_dtype=torch.float16)
canny_pipe_img2img = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
    "SG161222/Realistic_Vision_V3.0_VAE", controlnet=canny_controlnet_tile, torch_dtype=torch.float16, use_safetensors=True
)
canny_pipe_img2img.enable_model_cpu_offload()
canny_pipe_img2img.enable_xformers_memory_efficient_attention()


canny_pipe.scheduler = UniPCMultistepScheduler.from_config(canny_pipe.scheduler.config)
canny_pipe.enable_model_cpu_offload()
canny_pipe.enable_xformers_memory_efficient_attention()

controlnet_xl = ControlNetModel.from_pretrained(
    "diffusers/controlnet-canny-sdxl-1.0",
    torch_dtype=torch.float16
)
vae_xl = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
pipe_xl = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet_xl,
    vae=vae_xl,
    torch_dtype=torch.float16,
    use_safetensors=True,
    variant="fp16",
)
pipe_xl.scheduler = UniPCMultistepScheduler.from_config(pipe_xl.scheduler.config)
pipe_xl.enable_xformers_memory_efficient_attention()
pipe_xl.enable_model_cpu_offload()

refiner = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-refiner-1.0",
    text_encoder_2=pipe_xl.text_encoder_2,
    vae=pipe_xl.vae,
    torch_dtype=torch.float16,
    use_safetensors=True,
    variant="fp16",
)
refiner.enable_xformers_memory_efficient_attention()
refiner.enable_model_cpu_offload()

def resize_image_output(im, width, height):
    im = np.array(im)   
    newSize = (width,height)
    img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
    img = Image.fromarray(img)
    return img

def resize_image(im, max_size = 590000):
    [x,y,z] = im.shape
    new_size = [0,0]


    min_size = 262144
    if x*y > max_size:
        scale_ratio = math.sqrt((x*y)/max_size)
        new_size[0] = int(x / scale_ratio)
        new_size[1] = int(y / scale_ratio)
    elif x*y <= min_size:
        scale_ratio = math.sqrt((x*y)/min_size)
        new_size[0] = int(x / scale_ratio)
        new_size[1] = int(y / scale_ratio)
    else:
        new_size[0] = int(x)
        new_size[1] = int(y)
        
    height = (new_size[0] // 8) * 8
    width = (new_size[1] // 8) * 8
    
    newSize = (width,height)
    img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
    return img

def process_canny_tile(input_image,control_image, x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength_conditioning, scale, seed, eta, low_threshold, high_threshold):

    image = input_image

    return canny_pipe_img2img(
        prompt = '',
        image=image,
        control_image = image,
        num_inference_steps=20,
        guidance_scale=4,
        strength = 0.3,
        guess_mode = True,
        negative_prompt=n_prompt,
        num_images_per_prompt=1,
        eta=eta,
        generator=torch.Generator(device="cpu").manual_seed(seed)
    )

def process_canny(input_image,x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength, scale, seed, eta, low_threshold, high_threshold):

    image = input_image

    print(strength)


    return canny_pipe(
        prompt=','.join([prompt,a_prompt]),
        image=image,
        height=x,
        width=y,
        num_inference_steps=ddim_steps,
        guidance_scale=scale,
        negative_prompt=n_prompt,
        num_images_per_prompt=num_samples,
        eta=eta,
        controlnet_conditioning_scale=strength,
        generator=torch.Generator(device="cpu").manual_seed(seed)
    )

def process_canny_sdxl(input_image,x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength, scale, seed, eta, low_threshold, high_threshold):

    image = input_image
    
    image = pipe_xl(
        prompt=','.join([prompt,a_prompt]),
        image=image,
        height=x,
        width=y,
        num_inference_steps=ddim_steps,
        guidance_scale=scale,
        negative_prompt=n_prompt,
        num_images_per_prompt=num_samples,
        eta=eta,
        controlnet_conditioning_scale=strength,
        generator=torch.Generator(device="cpu").manual_seed(seed),
        output_type="latent"
    ).images
    
    return refiner(
    prompt=prompt,
    num_inference_steps=ddim_steps,
    num_images_per_prompt=num_samples,
    denoising_start=0.8,
    image=image,
    )


def process(image, prompt, a_prompt, n_prompt, ddim_steps, strength, scale, seed, eta, low_threshold, high_threshold):
    image = load_image(image)
    image = np.array(image)
    [x_orig,y_orig,z_orig] = image.shape
    image = resize_image(image)
    [x,y,z] = image.shape

    image = cv2.Canny(image, low_threshold, high_threshold)
    image = image[:, :, None]
    image = np.concatenate([image, image, image], axis=2)
    image = Image.fromarray(image)

    return process_canny(image,x,y, prompt, a_prompt, n_prompt, 1, None, ddim_steps, False, float(strength), scale, seed, eta, low_threshold, high_threshold)


demo = gr.Blocks().queue()

with demo:
    with gr.Row():
        gr.Markdown("## Control Stable Diffusion with Canny Edge Maps")

    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="pil", label="Input Image")
            input_prompt = gr.Textbox()
            run_button = gr.Button(label="Run")

            with gr.Accordion("Advanced Options"):
                strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
                low_threshold = gr.Slider(label="Canny low threshold", minimum=1, maximum=255, value=100, step=1)
                high_threshold = gr.Slider(label="Canny high threshold", minimum=1, maximum=255, value=200, step=1)
                ddim_steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=20, step=1)
                scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=7.5, step=0.1)  # default value was 9.0
                seed = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True)
                eta = gr.Number(label="eta (DDIM)", value=0.0)
                a_prompt = gr.Textbox(label="Added Prompt", value='best quality, extremely detailed')
                n_prompt = gr.Textbox(label="Negative Prompt",
                                      value='longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality')

        with gr.Column():
            result = gr.outputs.Image(label='Output', type="pil")

    ips = [input_image, input_prompt, a_prompt, n_prompt, ddim_steps, strength, scale, seed, eta, low_threshold, high_threshold]
    run_button.click(fn=process, inputs=ips, outputs=[result])


demo.launch()