Spaces:

tori29umai
/

FramePack_mask_fadeout_frame1

Running on Zero

App Files Files Community

FramePack_mask_fadeout_frame1 / app.py

tori29umai

Update app.py

62e73b1 verified 3 months ago

raw

history blame contribute delete

73.3 kB

	from diffusers_helper.hf_login import login

	import gc
	import time
	import os
	import subprocess
	import glob
	import tempfile # 1フレーム推論のための設定
	import shutil # ディレクトリ削除用
	import cv2 # 画像処理用
	import numpy as np
	from PIL import Image

	# Hugging Face Space環境内かどうか確認
	IN_HF_SPACE = os.environ.get('SPACE_ID') is not None

	# HF_HOMEの設定
	os.environ['HF_HOME'] = os.path.abspath(os.path.realpath(os.path.join(os.path.dirname(__file__), './hf_download')))

	import gradio as gr
	import torch
	import traceback
	import einops
	import safetensors.torch as sf
	import numpy as np

	# GPU利用可能性を追跡する変数を追加
	GPU_AVAILABLE = False
	GPU_INITIALIZED = False
	last_update_time = time.time()
	cpu_fallback_mode = False # CPUフォールバックモードのフラグ

	# モデルの初期化ステータスを追跡
	MODELS_INITIALIZED = False

	# クライアントタイムアウト設定の強化
	if IN_HF_SPACE:
	# サーバーとクライアントの両方のタイムアウト設定を拡張
	os.environ["GRADIO_SERVER_PORT"] = "7860"
	os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
	os.environ["GRADIO_UPLOAD_TIMEOUT"] = "600" # 10分のアップロードタイムアウト
	os.environ["GRADIO_REQUEST_TIMEOUT"] = "900" # 15分のリクエストタイムアウト
	# メモリ使用量制限の緩和
	import resource
	resource.setrlimit(resource.RLIMIT_AS, (1<<40, 1<<40))

	# Hugging Face Space内の場合、spacesモジュールをインポート
	if IN_HF_SPACE:
	try:
	import spaces
	print("Hugging Face Space環境内で実行中、spacesモジュールをインポートしました")

	# GPU利用可能性をチェック
	try:
	GPU_AVAILABLE = torch.cuda.is_available()
	print(f"GPU利用可能: {GPU_AVAILABLE}")
	if GPU_AVAILABLE:
	print(f"GPUデバイス名: {torch.cuda.get_device_name(0)}")
	print(f"GPUメモリ: {torch.cuda.get_device_properties(0).total_memory / 1e9} GB")

	# 小規模なGPU操作を試行し、GPUが実際に使用可能か確認
	try:
	test_tensor = torch.zeros(1, device='cuda')
	test_tensor = test_tensor + 1
	del test_tensor
	print("GPUテスト操作に成功しました")
	except Exception as e:
	print(f"GPUテスト操作でエラーが発生しました: {e}")
	GPU_AVAILABLE = False
	cpu_fallback_mode = True
	print("CPUフォールバックモードに設定します")
	else:
	print("警告: CUDAが利用可能と報告されていますが、GPUデバイスが検出されませんでした")
	cpu_fallback_mode = True
	except Exception as e:
	GPU_AVAILABLE = False
	cpu_fallback_mode = True
	print(f"GPU確認中にエラーが発生しました: {e}")
	print("CPUモードで実行します")
	except ImportError:
	print("spacesモジュールのインポートに失敗しました。Hugging Face Space環境外かもしれません")
	GPU_AVAILABLE = torch.cuda.is_available()
	if not GPU_AVAILABLE:
	cpu_fallback_mode = True

	# 初回ロード時のチェック関数
	def is_first_time_load():
	global GPU_INITIALIZED
	if not GPU_INITIALIZED:
	GPU_INITIALIZED = True
	return True
	return False


	# GPU制限を超えたかどうかを確認する関数
	def check_gpu_quota_exceeded():
	"""GPU使用制限を超えたかどうかを確認"""
	global cpu_fallback_mode

	# すでにCPUモードならチェック不要
	if cpu_fallback_mode or not GPU_AVAILABLE:
	return True

	if not IN_HF_SPACE:
	return False

	try:
	import requests
	try:
	response = requests.get("http://localhost:7860/api/v1/spaces/usage", timeout=1)
	if response.status_code == 200:
	try:
	data = response.json()
	if data.get("gpu", {}).get("quota_exceeded", False):
	print("GPU使用制限に達しています。")
	cpu_fallback_mode = True
	return True
	except ValueError as json_err:
	print(f"JSON解析エラー: {json_err}")
	# JSONデコードエラーが続く場合は、CPU動作にフォールバック
	return False
	else:
	print(f"APIエンドポイントから不正なステータスコード: {response.status_code}")
	except requests.exceptions.RequestException as req_err:
	print(f"APIリクエスト中にエラー: {req_err}")
	except Exception as e:
	print(f"GPU使用制限確認中にエラー: {e}")

	return False

	# 条件付きインポート（CPUモードでのエラーを回避するため）
	try:
	from diffusers import AutoencoderKLHunyuanVideo
	from transformers import LlamaModel, CLIPTextModel, LlamaTokenizerFast, CLIPTokenizer
	from diffusers_helper.hunyuan import encode_prompt_conds, vae_decode, vae_encode, vae_decode_fake
	from diffusers_helper.utils import (
	save_bcthw_as_mp4,
	crop_or_pad_yield_mask,
	soft_append_bcthw,
	resize_and_center_crop,
	state_dict_weighted_merge,
	state_dict_offset_merge,
	generate_timestamp,
	)
	from diffusers_helper.models.hunyuan_video_packed import HunyuanVideoTransformer3DModelPacked
	from diffusers_helper.pipelines.k_diffusion_hunyuan import sample_hunyuan
	from diffusers_helper.memory import (
	cpu,
	gpu,
	get_cuda_free_memory_gb,
	move_model_to_device_with_memory_preservation,
	offload_model_from_device_for_memory_preservation,
	fake_diffusers_current_device,
	DynamicSwapInstaller,
	unload_complete_models,
	load_model_as_complete,
	IN_HF_SPACE as MEMORY_IN_HF_SPACE
	)
	from diffusers_helper.thread_utils import AsyncStream, async_run
	from diffusers_helper.gradio.progress_bar import make_progress_bar_css, make_progress_bar_html
	from transformers import SiglipImageProcessor, SiglipVisionModel
	from diffusers_helper.clip_vision import hf_clip_vision_encode
	from diffusers_helper.bucket_tools import find_nearest_bucket
	print("基本的なディフューザーモジュールを正常にインポートしました")
	except ImportError as e:
	print(f"一部の基本モジュールのインポートに失敗しました: {e}")
	# ダミー関数を定義
	class AsyncStream:
	def __init__(self):
	self.input_queue = MockQueue()
	self.output_queue = MockQueue()

	class MockQueue:
	def __init__(self):
	self.items = []

	def push(self, item):
	self.items.append(item)

	def top(self):
	return self.items[-1] if self.items else None

	def next(self):
	return self.items.pop(0) if self.items else ("end", None)

	def async_run(args, *kwargs):
	pass

	def make_progress_bar_css():
	return ""

	def make_progress_bar_html(percentage, hint):
	return f"<div>{percentage}% - {hint}</div>"

	# GPU使用に必要なモジュールのインポートを試みる（可能な場合）
	try:
	from utils.lora_utils import merge_lora_to_state_dict
	from utils.fp8_optimization_utils import optimize_state_dict_with_fp8, apply_fp8_monkey_patch
	print("LoRAとFP8最適化モジュールを正常にインポートしました")
	except ImportError as e:
	print(f"一部のモジュールのインポートに失敗しました: {e}")
	# ダミー関数を定義
	def merge_lora_to_state_dict(state_dict, lora_file, lora_multiplier, device=None):
	print("Warning: LoRA適用機能が利用できません")
	return state_dict

	def optimize_state_dict_with_fp8(state_dict, device, target_keys, exclude_keys, move_to_device=False):
	print("Warning: FP8最適化機能が利用できません")
	return state_dict

	def apply_fp8_monkey_patch(model, state_dict, use_scaled_mm=False):
	print("Warning: FP8 monkey patch機能が利用できません")
	pass

	outputs_folder = './outputs/'
	os.makedirs(outputs_folder, exist_ok=True)

	# 追加: 指定された解像度リスト
	NEW_RESOLUTIONS = [
	(416, 960), (448, 864), (480, 832), (512, 768), (544, 704),
	(576, 672), (608, 640), (640, 608), (672, 576), (704, 544),
	(768, 512), (832, 480), (864, 448), (960, 416), (640, 640),
	]

	# VRAMを安全に確認する関数
	def get_safe_vram_size():
	"""利用可能なVRAMを安全に確認する"""
	try:
	if torch.cuda.is_available() and not cpu_fallback_mode:
	free_mem_gb = get_cuda_free_memory_gb(gpu)
	print(f'空きVRAM {free_mem_gb} GB')
	return free_mem_gb
	else:
	free_mem_gb = 6.0 # デフォルト値
	print("CUDAが利用できないか、CPUフォールバックモードです。デフォルトのメモリ設定を使用します")
	return free_mem_gb
	except Exception as e:
	free_mem_gb = 6.0 # デフォルト値
	print(f"CUDAメモリ取得中にエラーが発生しました: {e}、デフォルトのメモリ設定を使用します")
	return free_mem_gb

	# メモリ設定を初期化
	if not IN_HF_SPACE:
	# 非Spaces環境でのメモリ設定
	free_mem_gb = get_safe_vram_size()
	high_vram = free_mem_gb > 60
	print(f'高VRAM モード: {high_vram}')
	else:
	# Spaces環境でのメモリ設定
	print("Spaces環境でデフォルトのメモリ設定を使用します")
	try:
	if GPU_AVAILABLE and not cpu_fallback_mode:
	free_mem_gb = torch.cuda.get_device_properties(0).total_memory / 1e9 * 0.9 # GPUメモリの90%を使用
	high_vram = free_mem_gb > 10 # より保守的な条件
	else:
	free_mem_gb = 6.0 # デフォルト値
	high_vram = False
	except Exception as e:
	print(f"GPUメモリ取得中にエラーが発生しました: {e}")
	free_mem_gb = 6.0 # デフォルト値
	high_vram = False

	print(f'GPUメモリ: {free_mem_gb:.2f} GB, 高VRAMモード: {high_vram}')

	# modelsグローバル変数でモデル参照を保存
	models = {}
	stream = None

	# 市松模様を作成する関数
	def create_checkerboard(width, height, cell_size):
	"""紫と黄色の市松模様を作成する"""
	# 市松模様のサイズを計算
	rows = int(np.ceil(height / cell_size))
	cols = int(np.ceil(width / cell_size))

	# 紫と黄色の色を定義
	purple = (128, 0, 128) # RGB for purple
	yellow = (255, 255, 0) # RGB for yellow

	# 空の画像を作成
	checkerboard = np.zeros((rows * cell_size, cols * cell_size, 3), dtype=np.uint8)

	# 市松模様を埋める
	for i in range(rows):
	for j in range(cols):
	color = purple if (i + j) % 2 == 0 else yellow
	y_start = i * cell_size
	y_end = (i + 1) * cell_size
	x_start = j * cell_size
	x_end = (j + 1) * cell_size
	checkerboard[y_start:y_end, x_start:x_end] = color

	# 元の画像サイズにリサイズ
	return checkerboard[:height, :width]

	# ImageMaskからの画像とマスクを処理する関数
	def process_image_mask(image_mask_dict):
	"""ImageMaskからの画像とマスクを処理（サイズ制限あり）"""
	if image_mask_dict is None or not isinstance(image_mask_dict, dict):
	return None

	# Gradio ImageMask の新フォーマット
	background = image_mask_dict.get("background")
	layers = image_mask_dict.get("layers")

	if background is None:
	return None

	# 画像サイズをチェックして制限（大きすぎる場合はリサイズ）
	if isinstance(background, Image.Image):
	w, h = background.size
	max_size = 1024 # 最大サイズを1024pxに制限
	if w > max_size or h > max_size:
	# 長辺が1024になるようにリサイズ
	ratio = max_size / max(w, h)
	new_w, new_h = int(w * ratio), int(h * ratio)
	background = background.resize((new_w, new_h), Image.LANCZOS)
	print(f"画像サイズを制限しました: {w}x{h} → {new_w}x{new_h}")

	# レイヤーも同様にリサイズ
	if layers and len(layers) > 0:
	new_layers = []
	for layer in layers:
	if isinstance(layer, Image.Image):
	layer = layer.resize((new_w, new_h), Image.LANCZOS)
	new_layers.append(layer)
	layers = new_layers
	image_mask_dict["layers"] = layers

	image_mask_dict["background"] = background

	# ---- 1) Drop alpha from background ----
	if isinstance(background, Image.Image) and background.mode == "RGBA":
	background = background.convert("RGB")
	img_array = np.array(background)
	# safety-net: if it's still 4-channel, just slice
	if img_array.ndim == 3 and img_array.shape[2] == 4:
	img_array = img_array[..., :3]

	# ---- 2) マスクがある場合のみマスク処理 ----
	if layers and len(layers) > 0:
	layer = layers[0]
	if isinstance(layer, Image.Image) and layer.mode == "RGBA":
	layer = layer.convert("RGB")
	mask_array = np.array(layer)
	if mask_array.ndim == 3 and mask_array.shape[2] == 4:
	mask_array = mask_array[..., :3]

	# convert to gray + binary
	if mask_array.ndim == 3:
	mask_gray = cv2.cvtColor(mask_array, cv2.COLOR_RGB2GRAY)
	else:
	mask_gray = mask_array
	_, binary_mask = cv2.threshold(mask_gray, 1, 255, cv2.THRESH_BINARY)

	# 市松模様合成ロジック
	total_pixels = img_array.shape[0] * img_array.shape[1]
	cell_size = max(int(np.sqrt(total_pixels) / 20), 10)
	checkerboard = create_checkerboard(img_array.shape[1], img_array.shape[0], cell_size)

	result = img_array.copy()
	binary_mask_3ch = np.stack([binary_mask]*3, axis=2) // 255
	for c in range(3):
	result[..., c] = result[..., c] * (1 - binary_mask_3ch[..., c]) + checkerboard[..., c] * binary_mask_3ch[..., c]

	return result.astype(np.uint8)
	else:
	# マスクがない場合は元の画像をそのまま返す
	return img_array

	# 最も近い解像度を見つける関数
	def find_nearest_resolution(width, height):
	"""最適な解像度を選択する関数（正方形入力に対する改善版）"""
	min_diff = float('inf')
	best_res = None
	aspect_ratio = width / height

	# 入力がほぼ正方形の場合、正方形の解像度を優先する
	# アスペクト比が0.95〜1.05の範囲なら正方形と見なす
	is_square_input = 0.95 <= aspect_ratio <= 1.05

	for res_h, res_w in NEW_RESOLUTIONS:
	# 解像度のアスペクト比を計算
	res_aspect = res_w / res_h

	# 正方形入力で、この解像度も正方形なら、優先的に選択
	if is_square_input and res_w == res_h:
	return (res_h, res_w) # 正方形の解像度を即座に返す

	# アスペクト比の差を計算
	aspect_diff = abs(res_aspect - aspect_ratio)

	# 総ピクセル数の差を計算
	pixels_orig = width * height
	pixels_res = res_w * res_h
	pixel_diff = abs(pixels_res - pixels_orig)

	# 重み付けした差分（アスペクト比の差に重きを置く）
	total_diff = aspect_diff * 10000 + pixel_diff * 0.01

	if total_diff < min_diff:
	min_diff = total_diff
	best_res = (res_h, res_w)

	return best_res

	# 一時ディレクトリ管理関数
	def create_temp_directory():
	"""一時ディレクトリを作成して、パスを返す"""
	temp_dir = tempfile.mkdtemp(prefix="hunyuan_temp_")
	print(f"一時ディレクトリを作成しました: {temp_dir}")
	return temp_dir

	def cleanup_temp_files(temp_dir):
	"""処理後に一時ファイルを削除する"""
	if temp_dir and os.path.exists(temp_dir):
	try:
	shutil.rmtree(temp_dir)
	print(f"一時ディレクトリを削除しました: {temp_dir}")
	except Exception as e:
	print(f"一時ディレクトリの削除に失敗しました: {e}")

	# mp4からffmpegでPNGフレームを抽出する関数（一時フォルダ使用）
	def extract_frames_from_mp4(mp4_path, temp_dir, job_id):
	"""MP4から画像フレームを抽出し、一時ディレクトリに保存"""
	# フレーム出力用のフォルダ作成
	frames_dir = os.path.join(temp_dir, "frames", job_id)
	os.makedirs(frames_dir, exist_ok=True)

	# ffmpegコマンドでmp4からフレームを抽出
	cmd = [
	'ffmpeg',
	'-i', mp4_path,
	'-vf', 'fps=30',
	f'{frames_dir}/frame_%04d.png',
	'-hide_banner',
	'-loglevel', 'error'
	]

	try:
	subprocess.run(cmd, check=True)
	# 抽出されたフレームのリストを返す
	frames = sorted(glob.glob(f'{frames_dir}/frame_*.png'))
	return frames
	except subprocess.CalledProcessError as e:
	print(f"Error extracting frames from {mp4_path}: {e}")
	return []

	# 一時ファイルを使用するmp4保存関数
	def save_bcthw_as_mp4_with_frames(bcthw, temp_dir, job_id, fps=30, crf=16):
	"""BCTHWテンソルをMP4として保存し、フレームを抽出する（一時ディレクトリ使用）"""
	# 一時ディレクトリにMP4を保存
	output_path = os.path.join(temp_dir, f"{job_id}.mp4")

	# 元の関数を呼び出してmp4を保存
	save_bcthw_as_mp4(bcthw, output_path, fps, crf)

	# フレームを抽出
	frames = extract_frames_from_mp4(output_path, temp_dir, job_id)

	return output_path, frames

	# GPUの状態を確認する関数
	def check_gpu_status():
	"""GPUが使用可能かつクォータ内かを確認"""
	global GPU_AVAILABLE, cpu_fallback_mode

	# GPU使用不可の場合はCPUモードのまま
	if not GPU_AVAILABLE:
	cpu_fallback_mode = True
	return False

	# GPU使用制限を超えた場合はCPUモードに切り替え
	if check_gpu_quota_exceeded():
	print("GPU使用制限を超えました。CPUモードに切り替えます。")
	cpu_fallback_mode = True
	return False

	# GPUが使用可能でクォータ内
	return True

	# モデルロード関数（GPU対応）
	def load_models():
	"""各種モデルを読み込む（GPU対応）"""
	global models, MODELS_INITIALIZED, cpu_fallback_mode

	# すでに初期化済みの場合はそのまま返す
	if MODELS_INITIALIZED and models:
	return models

	print("モデルを読み込みます...")

	try:
	# GPU状態を確認
	gpu_ok = check_gpu_status()
	device = gpu if gpu_ok and not cpu_fallback_mode else cpu
	dtype = torch.float16 if gpu_ok and not cpu_fallback_mode else torch.float32

	print(f"モデルをロード: デバイス={device}, データ型={dtype}")

	# モデル読み込み
	text_encoder = LlamaModel.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo", subfolder="text_encoder", torch_dtype=dtype
	).cpu()
	text_encoder_2 = CLIPTextModel.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo", subfolder="text_encoder_2", torch_dtype=dtype
	).cpu()
	tokenizer = LlamaTokenizerFast.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="tokenizer")
	tokenizer_2 = CLIPTokenizer.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="tokenizer_2")
	vae = AutoencoderKLHunyuanVideo.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo", subfolder="vae", torch_dtype=dtype
	).cpu()

	feature_extractor = SiglipImageProcessor.from_pretrained("lllyasviel/flux_redux_bfl", subfolder="feature_extractor")
	image_encoder = SiglipVisionModel.from_pretrained(
	"lllyasviel/flux_redux_bfl", subfolder="image_encoder", torch_dtype=dtype
	).cpu()

	print("Transformerモデルを読み込み中...")

	# CPU対応: CPUモードではbfloat16ではなくfloat32を使用
	transformer_dtype = torch.bfloat16 if gpu_ok and not cpu_fallback_mode else torch.float32

	transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained(
	"lllyasviel/FramePackI2V_HY", torch_dtype=transformer_dtype
	).cpu()

	transformer.eval()
	transformer.high_quality_fp32_output_for_inference = True
	print("transformer.high_quality_fp32_output_for_inference = True")

	if gpu_ok and not cpu_fallback_mode:
	transformer.to(dtype=torch.bfloat16)
	transformer.requires_grad_(False)

	vae.eval()
	text_encoder.eval()
	text_encoder_2.eval()
	image_encoder.eval()

	if not high_vram or cpu_fallback_mode:
	vae.enable_slicing()
	vae.enable_tiling()

	# CPUモードでは精度を下げない
	if gpu_ok and not cpu_fallback_mode:
	vae.to(dtype=torch.float16)
	image_encoder.to(dtype=torch.float16)
	text_encoder.to(dtype=torch.float16)
	text_encoder_2.to(dtype=torch.float16)

	vae.requires_grad_(False)
	text_encoder.requires_grad_(False)
	text_encoder_2.requires_grad_(False)
	image_encoder.requires_grad_(False)

	# GPUに移動（可能な場合のみ）
	if gpu_ok and not cpu_fallback_mode:
	if not high_vram:
	# DynamicSwapInstallerはhuggingfaceのenable_sequential_offloadと同じですが3倍高速です
	DynamicSwapInstaller.install_model(text_encoder, device=gpu)
	else:
	text_encoder.to(gpu)
	text_encoder_2.to(gpu)
	image_encoder.to(gpu)
	vae.to(gpu)

	print("すべてのモデルの読み込みが完了しました")

	models = {
	'transformer': transformer,
	'text_encoder': text_encoder,
	'text_encoder_2': text_encoder_2,
	'tokenizer': tokenizer,
	'tokenizer_2': tokenizer_2,
	'vae': vae,
	'feature_extractor': feature_extractor,
	'image_encoder': image_encoder,
	}

	MODELS_INITIALIZED = True
	return models
	except Exception as e:
	# GPU関連のエラーを検出
	if "CUDA" in str(e) or "GPU" in str(e) or "nvidia" in str(e).lower():
	print(f"GPU関連のエラーが発生しました: {e}")
	print("CPUモードにフォールバックします")
	cpu_fallback_mode = True
	# CPUモードで再度試行
	return load_models_cpu()
	else:
	print(f"モデル読み込み中にエラーが発生しました: {e}")
	traceback.print_exc()
	return {}

	# CPUのみを使用したモデルロード関数
	def load_models_cpu():
	"""CPUのみを使用してモデルを読み込む"""
	global models, MODELS_INITIALIZED

	print("CPUモードでモデルを読み込みます...")

	try:
	# CPUモード用の設定
	device = cpu
	dtype = torch.float32

	# モデル読み込み（CPU最適化版）
	text_encoder = LlamaModel.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo", subfolder="text_encoder", torch_dtype=dtype
	).cpu()
	text_encoder_2 = CLIPTextModel.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo", subfolder="text_encoder_2", torch_dtype=dtype
	).cpu()
	tokenizer = LlamaTokenizerFast.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="tokenizer")
	tokenizer_2 = CLIPTokenizer.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="tokenizer_2")
	vae = AutoencoderKLHunyuanVideo.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo", subfolder="vae", torch_dtype=dtype
	).cpu()

	feature_extractor = SiglipImageProcessor.from_pretrained("lllyasviel/flux_redux_bfl", subfolder="feature_extractor")
	image_encoder = SiglipVisionModel.from_pretrained(
	"lllyasviel/flux_redux_bfl", subfolder="image_encoder", torch_dtype=dtype
	).cpu()

	print("CPUモードでTransformerモデルを読み込み中...")
	transformer = HunyuanVideoTransformer3DModelPacked.from_pretrained(
	"lllyasviel/FramePackI2V_HY", torch_dtype=torch.float32
	).cpu()

	transformer.eval()
	transformer.high_quality_fp32_output_for_inference = True
	transformer.requires_grad_(False)

	vae.eval()
	text_encoder.eval()
	text_encoder_2.eval()
	image_encoder.eval()

	# CPUモードでは常にスライシングとタイリングを有効化
	vae.enable_slicing()
	vae.enable_tiling()

	vae.requires_grad_(False)
	text_encoder.requires_grad_(False)
	text_encoder_2.requires_grad_(False)
	image_encoder.requires_grad_(False)

	print("CPUモードですべてのモデルの読み込みが完了しました")

	models = {
	'transformer': transformer,
	'text_encoder': text_encoder,
	'text_encoder_2': text_encoder_2,
	'tokenizer': tokenizer,
	'tokenizer_2': tokenizer_2,
	'vae': vae,
	'feature_extractor': feature_extractor,
	'image_encoder': image_encoder,
	}

	MODELS_INITIALIZED = True
	return models
	except Exception as e:
	print(f"CPUモードでのモデル読み込み中にエラーが発生しました: {e}")
	traceback.print_exc()
	return {}

	# モデル取得・初期化関数
	def get_models():
	"""モデルを取得する（必要に応じて読み込む）"""
	global models, GPU_INITIALIZED, MODELS_INITIALIZED, cpu_fallback_mode

	if not models or not MODELS_INITIALIZED:
	try:
	# GPU使用可能性を再確認
	if check_gpu_status():
	# GPU対応モードでモデル読み込み
	try:
	print("GPU対応モードでモデル読み込みを試みます")
	models = load_models()
	GPU_INITIALIZED = True
	except Exception as e:
	if "CUDA" in str(e) or "GPU" in str(e) or "ZeroGPU quota exceeded" in str(e) or "nvidia" in str(e).lower():
	print(f"GPU対応モデル読み込みに失敗しました: {e}")
	print("CPUモードにフォールバックします")
	cpu_fallback_mode = True
	models = load_models_cpu()
	else:
	print(f"モデル読み込み中に予期せぬエラーが発生しました: {e}")
	traceback.print_exc()
	# エラーを再スローせず、空のモデル辞書を返す
	return {}
	else:
	# CPUモードでモデル読み込み
	print("CPUモードでモデル読み込みを実行します")
	cpu_fallback_mode = True
	models = load_models_cpu()
	except Exception as e:
	print(f"モデル取得中にエラーが発生しました: {e}")
	traceback.print_exc()
	# エラーを再スローせず、空のモデル辞書を返す
	return {}

	return models

	# 処理ワーカー関数（GPU対応版）
	def worker_with_temp_files(
	image_mask_dict,
	prompt,
	n_prompt,
	seed,
	steps,
	cfg,
	gs,
	rs,
	gpu_memory_preservation,
	use_teacache,
	mp4_crf,
	lora_file,
	lora_multiplier,
	fp8_optimization,
	):
	global last_update_time, cpu_fallback_mode
	last_update_time = time.time()

	# GPU状態を最初にチェック
	check_gpu_status()

	# マスク画像の処理
	input_image = process_image_mask(image_mask_dict)
	if input_image is None:
	error_msg = "マスク画像の処理に失敗しました。画像をアップロードして、マスクを描画してください。"
	print(error_msg)
	stream.output_queue.push(("error", error_msg))
	stream.output_queue.push(("end", None))
	return

	# 一時ディレクトリの作成
	temp_dir = create_temp_directory()

	# 一時ディレクトリ内にサブディレクトリを作成
	os.makedirs(os.path.join(temp_dir, "frames"), exist_ok=True)

	# 1フレーム推論用の固定設定
	total_second_length = 1.0
	latent_window_size = 9
	total_latent_sections = 1
	latent_paddings = [0]

	job_id = generate_timestamp()

	stream.output_queue.push(("progress", (None, "", make_progress_bar_html(0, "開始中 ..."))))

	try:
	# モデルを取得
	models = get_models()
	if not models:
	error_msg = "モデルの読み込みに失敗しました。詳細はログを確認してください。"
	print(error_msg)
	stream.output_queue.push(("error", error_msg))
	stream.output_queue.push(("end", None))
	cleanup_temp_files(temp_dir)
	return

	transformer = models['transformer']
	text_encoder = models['text_encoder']
	text_encoder_2 = models['text_encoder_2']
	tokenizer = models['tokenizer']
	tokenizer_2 = models['tokenizer_2']
	vae = models['vae']
	feature_extractor = models['feature_extractor']
	image_encoder = models['image_encoder']

	# LoRAファイルの適用
	if not cpu_fallback_mode and lora_file is not None and os.path.exists(lora_file):
	try:
	print(f"LoRAファイル {os.path.basename(lora_file)} をマージします...")
	state_dict = transformer.state_dict()
	state_dict = merge_lora_to_state_dict(state_dict, lora_file, lora_multiplier, device=gpu)

	if fp8_optimization and not cpu_fallback_mode:
	TARGET_KEYS = ["transformer_blocks", "single_transformer_blocks"]
	EXCLUDE_KEYS = ["norm"] # Exclude norm layers from FP8

	print("FP8最適化を適用します")
	state_dict = optimize_state_dict_with_fp8(state_dict, gpu, TARGET_KEYS, EXCLUDE_KEYS, move_to_device=False)
	apply_fp8_monkey_patch(transformer, state_dict, use_scaled_mm=False)
	gc.collect()

	info = transformer.load_state_dict(state_dict, strict=True, assign=True)
	print(f"LoRAと/またはFP8最適化を適用しました: {info}")
	except Exception as e:
	print(f"LoRA適用中にエラーが発生しました: {e}")
	# エラー発生時も処理を継続
	elif cpu_fallback_mode and lora_file is not None and os.path.exists(lora_file):
	print("CPUモードではLoRAはサポートされていません")

	# Clean GPU (GPU使用時のみ)
	if not high_vram and not cpu_fallback_mode:
	unload_complete_models(text_encoder, text_encoder_2, image_encoder, vae, transformer)

	# Text encoding
	stream.output_queue.push(("progress", (None, "", make_progress_bar_html(0, "テキストエンコーディング中 ..."))))

	# 途中でGPU制限を超えた場合のチェック
	if check_gpu_quota_exceeded():
	cpu_fallback_mode = True
	print("テキストエンコード中にGPU制限を超えました。CPUモードに切り替えます。")
	# モデルを再度取得
	models = get_models()
	transformer = models['transformer']
	text_encoder = models['text_encoder']
	text_encoder_2 = models['text_encoder_2']
	tokenizer = models['tokenizer']
	tokenizer_2 = models['tokenizer_2']
	vae = models['vae']
	feature_extractor = models['feature_extractor']
	image_encoder = models['image_encoder']

	# GPU/CPU選択ロジック
	if not cpu_fallback_mode:
	target_device = gpu

	if not high_vram:
	# since we only encode one text - that is one model move and one encode, offload is same time consumption since it is also one load and one encode.
	fake_diffusers_current_device(text_encoder, target_device)
	load_model_as_complete(text_encoder_2, target_device=target_device)

	# テキストエンコーディングを実行
	llama_vec, clip_l_pooler = encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)

	if cfg == 1:
	llama_vec_n, clip_l_pooler_n = torch.zeros_like(llama_vec), torch.zeros_like(clip_l_pooler)
	else:
	llama_vec_n, clip_l_pooler_n = encode_prompt_conds(n_prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)
	else:
	# CPUモードでのテキストエンコーディング
	target_device = cpu

	# テキストエンコーダーをCPUに設定
	text_encoder = text_encoder.to(cpu)
	text_encoder_2 = text_encoder_2.to(cpu)

	# テキストエンコーディングを実行（CPU上で）
	llama_vec, clip_l_pooler = encode_prompt_conds(prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)

	if cfg == 1:
	llama_vec_n, clip_l_pooler_n = torch.zeros_like(llama_vec), torch.zeros_like(clip_l_pooler)
	else:
	llama_vec_n, clip_l_pooler_n = encode_prompt_conds(n_prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2)

	llama_vec, llama_attention_mask = crop_or_pad_yield_mask(llama_vec, length=512)
	llama_vec_n, llama_attention_mask_n = crop_or_pad_yield_mask(llama_vec_n, length=512)

	# Processing input image
	stream.output_queue.push(("progress", (None, "", make_progress_bar_html(0, "画像処理中 ..."))))

	H, W, C = input_image.shape

	# 元の画像サイズを保存
	original_height, original_width = H, W

	# 最適な解像度を選択
	target_height, target_width = find_nearest_resolution(W, H)
	print(f"オリジナルサイズ: {W}x{H}, 選択された解像度: {target_width}x{target_height}")

	input_image_np = resize_and_center_crop(input_image, target_width=target_width, target_height=target_height)

	# 一時ディレクトリに入力画像を保存
	input_image_path = os.path.join(temp_dir, f"{job_id}_input.png")
	Image.fromarray(input_image_np).save(input_image_path)

	input_image_pt = torch.from_numpy(input_image_np).float() / 127.5 - 1
	input_image_pt = input_image_pt.permute(2, 0, 1)[None, :, None]

	# VAE encoding
	stream.output_queue.push(("progress", (None, "", make_progress_bar_html(0, "VAEエンコーディング中 ..."))))

	# 途中でGPU制限を超えた場合のチェック
	if check_gpu_quota_exceeded():
	cpu_fallback_mode = True
	print("VAEエンコード中にGPU制限を超えました。CPUモードに切り替えます。")
	# モデルを再度取得
	models = get_models()
	vae = models['vae']

	if not cpu_fallback_mode and not high_vram:
	load_model_as_complete(vae, target_device=target_device)

	# VAEエンコーディング（CPU/GPUモードに応じて）
	if not cpu_fallback_mode:
	start_latent = vae_encode(input_image_pt, vae)
	else:
	# CPUモードでのVAEエンコーディング
	vae = vae.to(cpu)
	# CPU用に精度やバッチサイズを調整
	input_image_pt_cpu = input_image_pt.to(cpu, dtype=torch.float32)
	start_latent = vae_encode(input_image_pt_cpu, vae)

	# CLIP Vision
	stream.output_queue.push(("progress", (None, "", make_progress_bar_html(0, "CLIP Visionエンコーディング中 ..."))))

	# 途中でGPU制限を超えた場合のチェック
	if check_gpu_quota_exceeded():
	cpu_fallback_mode = True
	print("CLIP Vision処理中にGPU制限を超えました。CPUモードに切り替えます。")
	# モデルを再度取得
	models = get_models()
	image_encoder = models['image_encoder']

	if not cpu_fallback_mode and not high_vram:
	load_model_as_complete(image_encoder, target_device=target_device)

	# CLIP Visionエンコーディング（CPU/GPUモードに応じて）
	if not cpu_fallback_mode:
	image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
	else:
	# CPUモードでのCLIP Visionエンコーディング
	image_encoder = image_encoder.to(cpu)
	image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)

	image_encoder_last_hidden_state = image_encoder_output.last_hidden_state

	# データ型の変換（CPU/GPUモードに応じて）
	if not cpu_fallback_mode:
	# GPUモードでの型変換
	llama_vec = llama_vec.to(torch.bfloat16)
	llama_vec_n = llama_vec_n.to(torch.bfloat16)
	clip_l_pooler = clip_l_pooler.to(torch.bfloat16)
	clip_l_pooler_n = clip_l_pooler_n.to(torch.bfloat16)
	image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(torch.bfloat16)
	else:
	# CPUモードではfloat32を維持
	llama_vec = llama_vec.to(torch.float32)
	llama_vec_n = llama_vec_n.to(torch.float32)
	clip_l_pooler = clip_l_pooler.to(torch.float32)
	clip_l_pooler_n = clip_l_pooler_n.to(torch.float32)
	image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(torch.float32)

	# Transformerモデルの準備
	stream.output_queue.push(("progress", (None, "", make_progress_bar_html(0, "Transformerモデル準備中 ..."))))

	# 途中でGPU制限を超えた場合のチェック
	if check_gpu_quota_exceeded():
	cpu_fallback_mode = True
	print("Transformer準備中にGPU制限を超えました。CPUモードに切り替えます。")
	# モデルを再度取得
	models = get_models()
	transformer = models['transformer']

	if not cpu_fallback_mode:
	# GPUモード
	if not high_vram:
	if IN_HF_SPACE:
	# Hugging Face Space環境でのメモリ管理
	move_model_to_device_with_memory_preservation(
	transformer, target_device=gpu, preserved_memory_gb=gpu_memory_preservation
	)
	else:
	# 通常環境でのメモリ管理
	DynamicSwapInstaller.install_model(transformer, device=gpu)
	else:
	transformer.to(gpu)
	else:
	# CPUモード
	transformer = transformer.to(cpu)

	# Sampling
	stream.output_queue.push(("progress", (None, "", make_progress_bar_html(0, "サンプリング開始 ..."))))

	rnd = torch.Generator("cpu").manual_seed(seed)

	# 1フレーム推論のための設定
	num_frames = 1
	print(f"1フレーム推論モード: num_frames = {num_frames}")

	# CPU/GPUモードに応じた設定
	device_to_use = cpu if cpu_fallback_mode else gpu
	dtype_to_use = torch.float32 if cpu_fallback_mode else torch.bfloat16

	history_latents = torch.zeros(size=(1, 16, 1 + 2 + 16, target_height // 8, target_width // 8), dtype=torch.float32).cpu()
	history_pixels = None
	total_generated_latent_frames = 0

	# 1フレーム推論処理
	for latent_padding in latent_paddings:
	is_last_section = latent_padding == 0
	latent_padding_size = latent_padding * latent_window_size

	if stream.input_queue.top() == "end":
	stream.output_queue.push(("end", None))
	cleanup_temp_files(temp_dir) # 終了時に一時ファイル削除
	return

	print(f"latent_padding_size = {latent_padding_size}, is_last_section = {is_last_section}")

	indices = torch.arange(0, sum([1, latent_padding_size, latent_window_size, 1, 2, 16])).unsqueeze(0)
	(
	clean_latent_indices_pre,
	blank_indices,
	latent_indices,
	clean_latent_indices_post,
	clean_latent_2x_indices,
	clean_latent_4x_indices,
	) = indices.split([1, latent_padding_size, latent_window_size, 1, 2, 16], dim=1)
	clean_latent_indices = torch.cat([clean_latent_indices_pre, clean_latent_indices_post], dim=1)

	clean_latents_pre = start_latent.to(history_latents)
	clean_latents_post, clean_latents_2x, clean_latents_4x = history_latents[:, :, : 1 + 2 + 16, :, :].split(
	[1, 2, 16], dim=2
	)
	clean_latents = torch.cat([clean_latents_pre, clean_latents_post], dim=2)

	# 1フレーム推論用の設定
	latent_indices = latent_indices[:, -1:]
	print(f"latent_indices = {latent_indices}")

	# 2xと4xは空に設定
	clean_latent_2x_indices = None
	clean_latent_4x_indices = None
	clean_latents_2x = None
	clean_latents_4x = None

	# GPU使用時のメモリ管理の最適化
	if not cpu_fallback_mode and not high_vram:
	unload_complete_models()
	move_model_to_device_with_memory_preservation(
	transformer, target_device=gpu, preserved_memory_gb=gpu_memory_preservation
	)

	if use_teacache and not cpu_fallback_mode:
	transformer.initialize_teacache(enable_teacache=True, num_steps=steps)
	else:
	transformer.initialize_teacache(enable_teacache=False)

	def callback(d):
	preview = d["denoised"]
	preview = vae_decode_fake(preview)

	preview = (preview * 255.0).detach().cpu().numpy().clip(0, 255).astype(np.uint8)
	preview = einops.rearrange(preview, "b c t h w -> (b h) (t w) c")

	if stream.input_queue.top() == "end":
	stream.output_queue.push(("end", None))
	cleanup_temp_files(temp_dir) # 終了時に一時ファイル削除
	raise KeyboardInterrupt("ユーザーがタスクを終了しました。")

	current_step = d["i"] + 1
	percentage = int(100.0 * current_step / steps)
	hint = f"サンプリング中 {current_step}/{steps}"
	desc = f"フレーム生成中: {current_step}/{steps} ({percentage}%)"
	stream.output_queue.push(("progress", (preview, desc, make_progress_bar_html(percentage, hint))))
	return

	# 途中でGPU制限を超えた場合のチェック
	if check_gpu_quota_exceeded():
	cpu_fallback_mode = True
	print("サンプリング前にGPU制限を超えました。CPUモードに切り替えます。")
	# モデルを再度取得
	models = get_models()
	transformer = models['transformer']
	transformer = transformer.to(cpu)
	device_to_use = cpu
	dtype_to_use = torch.float32

	# 適切な設定でサンプリング実行
	try:
	# CPUモードでステップ数を減らす（速度向上のため）
	actual_steps = min(steps, 15) if cpu_fallback_mode else steps

	generated_latents = sample_hunyuan(
	transformer=transformer,
	sampler="unipc",
	width=target_width,
	height=target_height,
	frames=num_frames,
	real_guidance_scale=cfg,
	distilled_guidance_scale=gs,
	guidance_rescale=rs,
	# shift=3.0,
	num_inference_steps=actual_steps,
	generator=rnd,
	prompt_embeds=llama_vec,
	prompt_embeds_mask=llama_attention_mask,
	prompt_poolers=clip_l_pooler,
	negative_prompt_embeds=llama_vec_n,
	negative_prompt_embeds_mask=llama_attention_mask_n,
	negative_prompt_poolers=clip_l_pooler_n,
	device=device_to_use,
	dtype=dtype_to_use,
	image_embeddings=image_encoder_last_hidden_state,
	latent_indices=latent_indices,
	clean_latents=clean_latents,
	clean_latent_indices=clean_latent_indices,
	clean_latents_2x=clean_latents_2x,
	clean_latent_2x_indices=clean_latent_2x_indices,
	clean_latents_4x=clean_latents_4x,
	clean_latent_4x_indices=clean_latent_4x_indices,
	callback=callback,
	)
	except Exception as e:
	if "CUDA" in str(e) or "GPU" in str(e) or "ZeroGPU quota exceeded" in str(e):
	print(f"サンプリング中にGPU関連エラーが発生: {e}")
	print("CPUモードに切り替えて再試行します")

	# CPUモードに切り替え
	cpu_fallback_mode = True

	# モデルをCPUに移動
	transformer = transformer.to(cpu)

	# CPUモード用のパラメータ設定
	device_to_use = cpu
	dtype_to_use = torch.float32

	# CPUモードでステップ数を減らす
	actual_steps = min(steps, 15)

	# CPUモードで再試行
	generated_latents = sample_hunyuan(
	transformer=transformer,
	sampler="unipc",
	width=target_width,
	height=target_height,
	frames=num_frames,
	real_guidance_scale=cfg,
	distilled_guidance_scale=gs,
	guidance_rescale=rs,
	num_inference_steps=actual_steps,
	generator=rnd,
	prompt_embeds=llama_vec.to(torch.float32),
	prompt_embeds_mask=llama_attention_mask,
	prompt_poolers=clip_l_pooler.to(torch.float32),
	negative_prompt_embeds=llama_vec_n.to(torch.float32),
	negative_prompt_embeds_mask=llama_attention_mask_n,
	negative_prompt_poolers=clip_l_pooler_n.to(torch.float32),
	device=device_to_use,
	dtype=dtype_to_use,
	image_embeddings=image_encoder_last_hidden_state.to(torch.float32),
	latent_indices=latent_indices,
	clean_latents=clean_latents,
	clean_latent_indices=clean_latent_indices,
	clean_latents_2x=clean_latents_2x,
	clean_latent_2x_indices=clean_latent_2x_indices,
	clean_latents_4x=clean_latents_4x,
	clean_latent_4x_indices=clean_latent_4x_indices,
	callback=callback,
	)
	else:
	# GPU関連以外のエラーは再スロー
	raise

	print(f"generated_latents.shape = {generated_latents.shape}")

	total_generated_latent_frames += int(generated_latents.shape[2])
	history_latents = torch.cat([generated_latents.to(history_latents), history_latents], dim=2)

	# メモリ管理の最適化（GPUモードのみ）
	if not cpu_fallback_mode and not high_vram:
	offload_model_from_device_for_memory_preservation(transformer, target_device=gpu, preserved_memory_gb=8)
	load_model_as_complete(vae, target_device=gpu)

	real_history_latents =history_latents[:, :, :total_generated_latent_frames, :, :]

	# VAEデコード
	stream.output_queue.push(("progress", (None, "", make_progress_bar_html(70, "VAEデコード中 ..."))))

	# VAEデコード前にGPU制限超過チェック
	if check_gpu_quota_exceeded():
	cpu_fallback_mode = True
	print("VAEデコード前にGPU制限を超えました。CPUモードに切り替えます。")
	# モデルを再度取得
	models = get_models()
	vae = models['vae']
	vae = vae.to(cpu)

	if history_pixels is None:
	if not cpu_fallback_mode:
	history_pixels = vae_decode(real_history_latents, vae).cpu()
	else:
	# CPUモードでのVAEデコード
	vae = vae.to(cpu)
	history_pixels = vae_decode(real_history_latents.to(cpu, dtype=torch.float32), vae).cpu()
	else:
	section_latent_frames = (latent_window_size * 2 + 1) if is_last_section else (latent_window_size * 2)
	overlapped_frames = latent_window_size * 4 - 3

	if not cpu_fallback_mode:
	current_pixels = vae_decode(real_history_latents[:, :, :section_latent_frames], vae).cpu()
	else:
	# CPUモードでのVAEデコード
	vae = vae.to(cpu)
	current_pixels = vae_decode(real_history_latents[:, :, :section_latent_frames].to(cpu, dtype=torch.float32), vae).cpu()

	history_pixels = soft_append_bcthw(current_pixels, history_pixels, overlapped_frames)

	if not cpu_fallback_mode and not high_vram:
	unload_complete_models()

	# 一時フォルダにMP4保存とフレーム抽出
	stream.output_queue.push(("progress", (None, "", make_progress_bar_html(80, "動画保存中 ..."))))

	output_path, extracted_frames = save_bcthw_as_mp4_with_frames(
	history_pixels, temp_dir, job_id, fps=30, crf=mp4_crf
	)

	# 生成情報
	resize_info = {
	"original_width": original_width,
	"original_height": original_height,
	"target_width": target_width,
	"target_height": target_height
	}

	print(f"デコード完了。現在の潜在変数形状 {real_history_latents.shape}; ピクセル形状 {history_pixels.shape}")
	print(f"MP4から {len(extracted_frames)} フレームを抽出しました")

	# 元の解像度にリサイズした結果フレームを作成
	stream.output_queue.push(("progress", (None, "", make_progress_bar_html(90, "リサイズ中 ..."))))

	if resize_info:
	print(f"原寸サイズに戻します: {resize_info['original_width']}x{resize_info['original_height']}")
	resized_frames = []

	for frame_path in extracted_frames:
	# フレームを読み込み
	frame = Image.open(frame_path)
	# 元のサイズにリサイズ
	resized_frame = frame.resize((resize_info['original_width'], resize_info['original_height']), Image.LANCZOS)
	# 一時ファイルに保存
	resized_path = frame_path.replace(".png", "_resized.png")
	resized_frame.save(resized_path)
	resized_frames.append(resized_path)

	# 元のサイズに戻したフレームのリストを使用
	extracted_frames = resized_frames

	# 最後のフレームのパスを取得
	last_frame_path = extracted_frames[0] if extracted_frames else None

	# 結果を送信
	stream.output_queue.push(("file", (output_path, last_frame_path, resize_info, temp_dir)))

	if is_last_section:
	break
	except Exception as e:
	traceback.print_exc()
	print(f"エラーが発生しました: {str(e)}")

	if not cpu_fallback_mode and not high_vram:
	try:
	unload_complete_models(text_encoder, text_encoder_2, image_encoder, vae, transformer)
	except:
	pass

	# エラー発生時も一時ファイルを削除
	cleanup_temp_files(temp_dir)

	# エラーメッセージを送信
	stream.output_queue.push(("error", str(e)))

	stream.output_queue.push(("end", None))
	return

	# 統合版プロセス関数 - GPU/CPU両方に対応（Spaces環境向け改良版）
	if IN_HF_SPACE and 'spaces' in globals():
	@spaces.GPU(duration=120)
	def process_with_temp(image_mask_dict, lora_multiplier=1.0):
	"""一時ファイルを使用する処理メインフロー（GPU/CPU対応）"""
	global stream, cpu_fallback_mode

	# GPU状態を事前チェック
	check_gpu_status()

	# 入力画像が提供されていることを確認
	if image_mask_dict is None:
	return (
	gr.update(visible=False), # preview_image
	gr.update(visible=False), # result_frame
	"画像をアップロードしてマスクを描画してください", # progress_desc
	"", # progress_bar
	gr.update(interactive=True), # start_button
	gr.update(interactive=False), # end_button
	f"実行モード: {'CPU' if cpu_fallback_mode else 'GPU'}" # mode_info
	)

	# 処理開始時に UI をリセット
	yield (
	gr.update(visible=False), # preview_image
	gr.update(visible=False), # result_frame
	"", # progress_desc
	"", # progress_bar
	gr.update(interactive=False), # start_button
	gr.update(interactive=True), # end_button
	f"実行モード: {'CPU' if cpu_fallback_mode else 'GPU'}" # mode_info
	)

	# 固定パラメータ
	prompt = "A yellow and purple checkerboard mask fades away, smoothly revealing the background beneath, while the areas not covered by the mask remain completely still."
	n_prompt = None
	seed = 1234
	steps = 25 # CPU モードでは自動的に減らされる
	cfg = 1.0
	gs = 10.0
	rs = 0.0
	gpu_memory_preservation = 6.0
	use_teacache = False
	mp4_crf = 0
	lora_file = "./LoRA/mask_fadeout_V1.safetensors"
	fp8_optimization = False

	# LoRAファイルの存在確認
	if not os.path.exists(lora_file):
	print(f"警告: LoRAファイル {lora_file} が見つかりません。LoRAなしで処理を続行します。")
	lora_file = None

	try:
	# GPU使用可能かどうかを確認
	if check_gpu_quota_exceeded():
	print("GPU使用制限を超えているため、CPUモードで実行します")
	cpu_fallback_mode = True
	except Exception as e:
	print(f"GPU確認中にエラーが発生しました: {e}")

	# モード情報をログに表示
	print(f"実行モード: {'CPU' if cpu_fallback_mode else 'GPU'}")

	# 非同期ワーカー起動
	stream = AsyncStream()
	async_run(
	worker_with_temp_files,
	image_mask_dict,
	prompt,
	n_prompt,
	seed,
	steps,
	cfg,
	gs,
	rs,
	gpu_memory_preservation,
	use_teacache,
	mp4_crf,
	lora_file,
	lora_multiplier,
	fp8_optimization,
	)

	temp_dir = None
	last_frame_path = None

	try:
	while True:
	flag, data = stream.output_queue.next()

	# 生成完了ファイルを受け取ったとき
	if flag == "file":
	output_path, last_frame, resize_info, temp_dir = data
	last_frame_path = last_frame
	img_file = Image.open(last_frame_path)

	yield (
	gr.update(visible=True), # preview_image に前回プレビューがあればそのまま
	gr.update(visible=True, value=img_file),
	gr.update(), # progress_desc
	gr.update(), # progress_bar
	gr.update(interactive=False), # start_button を無効化
	gr.update(interactive=True), # end_button を有効化
	f"実行モード: {'CPU' if cpu_fallback_mode else 'GPU'}" # mode_info
	)

	# 進捗更新を受け取ったとき
	elif flag == "progress":
	preview, desc, html = data
	yield (
	gr.update(visible=True, value=preview), # preview_image に進捗サムネイル
	gr.update(visible=False), # result_frame は隠す
	desc, # progress_desc にテキスト
	html, # progress_bar に HTML
	gr.update(interactive=False), # start_button
	gr.update(interactive=True), # end_button
	f"実行モード: {'CPU' if cpu_fallback_mode else 'GPU'}" # mode_info
	)

	# エラーを受け取ったとき
	elif flag == "error":
	error_message = data
	yield (
	gr.update(visible=False), # preview_image
	gr.update(visible=False), # result_frame
	error_message, # progress_desc にエラー表示
	"", # progress_bar
	gr.update(interactive=True), # start_button
	gr.update(interactive=False), # end_button
	f"実行モード: {'CPU' if cpu_fallback_mode else 'GPU'} (エラー発生)" # mode_info
	)
	if temp_dir and os.path.exists(temp_dir):
	cleanup_temp_files(temp_dir)
	break

	# 処理終了を受け取ったとき
	elif flag == "end":
	img_end = Image.open(last_frame_path)
	# 最終的に last_frame を再表示
	yield (
	gr.update(visible=False), # preview_image を隠す
	gr.update(visible=True, value=img_end), # result_frame に１枚だけ表示
	"", # progress_desc
	"", # progress_bar
	gr.update(interactive=True), # start_button
	gr.update(interactive=False), # end_button
	f"実行モード: {'CPU' if cpu_fallback_mode else 'GPU'} (完了)" # mode_info
	)
	if temp_dir and os.path.exists(temp_dir):
	cleanup_temp_files(temp_dir)
	break

	except Exception as e:
	print(f"処理中にエラーが発生しました: {e}")
	if temp_dir and os.path.exists(temp_dir):
	cleanup_temp_files(temp_dir)
	raise e
	else:
	# 非Spaces環境用のプロセス関数
	def process_with_temp(image_mask_dict, lora_multiplier=1.0):
	"""一時ファイルを使用する処理メインフロー（非GPU環境用）"""
	global stream, cpu_fallback_mode

	# CPU固定モード
	cpu_fallback_mode = True

	# 入力画像が提供されていることを確認
	if image_mask_dict is None:
	return (
	gr.update(visible=False), # preview_image
	gr.update(visible=False), # result_frame
	"画像をアップロードしてマスクを描画してください", # progress_desc
	"", # progress_bar
	gr.update(interactive=True), # start_button
	gr.update(interactive=False), # end_button
	"実行モード: CPU (通常環境)" # mode_info
	)

	# 処理開始時に UI をリセット
	yield (
	gr.update(visible=False), # preview_image
	gr.update(visible=False), # result_frame
	"", # progress_desc
	"", # progress_bar
	gr.update(interactive=False), # start_button
	gr.update(interactive=True), # end_button
	"実行モード: CPU (通常環境)" # mode_info
	)

	# 固定パラメータ
	prompt = "A yellow and purple checkerboard mask fades away, smoothly revealing the background beneath, while the areas not covered by the mask remain completely still."
	n_prompt = None
	seed = 1234
	steps = 15 # CPU環境では少ないステップ数
	cfg = 1.0
	gs = 10.0
	rs = 0.0
	gpu_memory_preservation = 6.0
	use_teacache = False
	mp4_crf = 0
	lora_file = "./LoRA/mask_fadeout_V1.safetensors"
	fp8_optimization = False

	# LoRAファイルの存在確認
	if not os.path.exists(lora_file):
	print(f"警告: LoRAファイル {lora_file} が見つかりません。LoRAなしで処理を続行します。")
	lora_file = None

	# 非同期ワーカー起動
	stream = AsyncStream()
	async_run(
	worker_with_temp_files,
	image_mask_dict,
	prompt,
	n_prompt,
	seed,
	steps,
	cfg,
	gs,
	rs,
	gpu_memory_preservation,
	use_teacache,
	mp4_crf,
	lora_file,
	lora_multiplier,
	fp8_optimization,
	)

	temp_dir = None
	last_frame_path = None

	try:
	while True:
	flag, data = stream.output_queue.next()

	# 生成完了ファイルを受け取ったとき
	if flag == "file":
	output_path, last_frame, resize_info, temp_dir = data
	last_frame_path = last_frame

	yield (
	gr.update(visible=True), # preview_image に前回プレビューがあればそのまま
	gr.update(visible=True, value=last_frame_path),# result_frame に最初の１枚を表示
	gr.update(), # progress_desc
	gr.update(), # progress_bar
	gr.update(interactive=False), # start_button を無効化
	gr.update(interactive=True), # end_button を有効化
	"実行モード: CPU (通常環境)" # mode_info
	)

	# 進捗更新を受け取ったとき
	elif flag == "progress":
	preview, desc, html = data
	yield (
	gr.update(visible=True, value=preview), # preview_image に進捗サムネイル
	gr.update(visible=False), # result_frame は隠す
	desc, # progress_desc にテキスト
	html, # progress_bar に HTML
	gr.update(interactive=False), # start_button
	gr.update(interactive=True), # end_button
	"実行モード: CPU (通常環境)" # mode_info
	)

	# エラーを受け取ったとき
	elif flag == "error":
	error_message = data
	yield (
	gr.update(visible=False), # preview_image
	gr.update(visible=False), # result_frame
	error_message, # progress_desc にエラー表示
	"", # progress_bar
	gr.update(interactive=True), # start_button
	gr.update(interactive=False), # end_button
	"実行モード: CPU (エラー発生)" # mode_info
	)
	if temp_dir and os.path.exists(temp_dir):
	cleanup_temp_files(temp_dir)
	break

	# 処理終了を受け取ったとき
	elif flag == "end":
	img_end = Image.open(last_frame_path)
	# 最終的に last_frame を再表示
	yield (
	gr.update(visible=False), # preview_image を隠す
	gr.update(visible=True, value=img_end ), # result_frame に１枚だけ表示
	"", # progress_desc
	"", # progress_bar
	gr.update(interactive=True), # start_button
	gr.update(interactive=False), # end_button
	"実行モード: CPU (完了)" # mode_info
	)
	if temp_dir and os.path.exists(temp_dir):
	cleanup_temp_files(temp_dir)
	break

	except Exception as e:
	print(f"処理中にエラーが発生しました: {e}")
	if temp_dir and os.path.exists(temp_dir):
	cleanup_temp_files(temp_dir)
	raise e


	# 処理終了時に明示的にGPUメモリを解放
	def cleanup_gpu_resources():
	"""GPUリソースを明示的に解放する"""
	global models

	# モデルを全てCPUに移動
	for model_name, model in models.items():
	try:
	if model is not None and hasattr(model, 'to') and callable(model.to):
	model.to('cpu')
	print(f"{model_name}をCPUに移動しました")
	except Exception as e:
	print(f"{model_name}のCPU移動中にエラー: {e}")

	# キャッシュクリア
	try:
	torch.cuda.empty_cache()
	gc.collect()
	print("GPUキャッシュをクリアしました")
	except Exception as e:
	print(f"GPUキャッシュクリア中にエラー: {e}")

	# 処理中止・クリーンアップ関数（GPU/CPU共通）
	def end_process_with_cleanup():
	cleanup_gpu_resources()
	"""処理を中止し、一時ファイルを削除する"""
	global stream

	if stream is not None:
	stream.input_queue.push("end")
	print("処理を中止しました")

	# 既存の一時ディレクトリをすべて削除（念のため）
	for dir_path in glob.glob(tempfile.gettempdir() + "/hunyuan_temp_*"):
	if os.path.exists(dir_path):
	try:
	shutil.rmtree(dir_path)
	print(f"一時ディレクトリを削除しました: {dir_path}")
	except Exception as e:
	print(f"一時ディレクトリの削除中にエラーが発生しました: {e}")

	css = make_progress_bar_css()
	block = gr.Blocks(css=css).queue()
	with block:
	gr.Markdown("# FramePackI2V_HY_mask_fadeout - 画像のマスクした部分を除去")
	with gr.Row():
	with gr.Column():
	# 入力画像をImageMaskで設定
	image_mask = gr.ImageMask(
	label="画像をアップロードしてマスクを描画",
	type="pil",
	brush=gr.Brush(
	colors=["#FF00FF", "#FFFFFF", "#000000", "#FF0000", "#00FF00", "#0000FF", "#FFFF00"],
	default_color="#FF00FF",
	color_mode="defaults"
	),
	layers=True,
	height="70vh",
	width="60vh",
	)

	with gr.Row():
	start_button = gr.Button(value="生成開始")
	end_button = gr.Button(value="生成中止", interactive=False)

	with gr.Group():
	lora_multiplier = gr.Slider(label="LoRA倍率", minimum=0.0, maximum=2.0, value=1.0, step=0.1)

	with gr.Group():
	mode_info = gr.Markdown(f"実行モード: {'CPU' if cpu_fallback_mode else 'GPU'}")

	with gr.Column():
	preview_image = gr.Image(label="生成プレビュー", visible=False)
	result_frame = gr.Image(label="生成結果", visible=False, type="pil", height="60vh")

	progress_desc = gr.Markdown("", elem_classes="no-generating-animation")
	progress_bar = gr.HTML("", elem_classes="no-generating-animation")

	ips = [
	image_mask,
	lora_multiplier,
	]

	if IN_HF_SPACE and 'spaces' in globals():
	ops = [
	preview_image,
	result_frame,
	progress_desc,
	progress_bar,
	start_button,
	end_button,
	mode_info
	]
	else:
	ops = [
	preview_image,
	result_frame,
	progress_desc,
	progress_bar,
	start_button,
	end_button,
	mode_info
	]

	start_button.click(
	fn=process_with_temp, inputs=ips, outputs=ops
	)
	end_button.click(fn=end_process_with_cleanup)

	# アプリ起動関数（エラーハンドリング付き）
	def launch_app():
	"""アプリケーションの起動（エラーハンドリング付き）"""
	# 通常の起動方法
	block.launch(
	server_name="0.0.0.0",
	share=False,
	inbrowser=False,
	)

	# アプリケーションの起動
	launch_app()