from typing import Dict, List, Any import base64 import numpy as np import tensorflow as tf from tensorflow import keras from keras_cv.models.stable_diffusion.decoder import Decoder class EndpointHandler(): def __init__(self, path=""): img_height = 512 img_width = 512 img_height = round(img_height / 128) * 128 img_width = round(img_width / 128) * 128 self.decoder = Decoder(img_height, img_width) decoder_weights_fpath = keras.utils.get_file( origin="https://huggingface.co/fchollet/stable-diffusion/resolve/main/kcv_decoder.h5", file_hash="ad350a65cc8bc4a80c8103367e039a3329b4231c2469a1093869a345f55b1962", ) self.decoder.load_weights(decoder_weights_fpath) def __call__(self, data: Dict[str, Any]) -> str: # get inputs latent = data.pop("inputs", data) batch_size = data.pop("batch_size", 1) latent = base64.b64decode(latent) latent = np.frombuffer(latent, dtype="float32") latent = np.reshape(latent, (batch_size, 64, 64, 4)) decoded = self.decoder.predict_on_batch(latent) decoded = ((decoded + 1) / 2) * 255 images = np.clip(decoded, 0, 255).astype("uint8") images_b64 = base64.b64encode(images.tobytes()) images_b64str = images_b64.decode() return images_b64str