Spaces:

ghostai1
/

GhostPack

Running

App Files Files Community

GhostPack / app.py

ghostai1

Update app.py

6e6a051 verified 15 days ago

raw

history blame contribute delete

22.2 kB

	#!/usr/bin/env python3
	# ---------------------------------------------------------------------------
	# RELEASE – GhostPack Video Generator (📸 Showcase Tab Polished)
	# ---------------------------------------------------------------------------
	import os, sys, argparse, random, traceback, json, logging
	import numpy as np, torch, einops, gradio as gr
	from PIL import Image
	from diffusers import AutoencoderKLHunyuanVideo
	from transformers import (
	LlamaModel, CLIPTextModel, LlamaTokenizerFast, CLIPTokenizer,
	SiglipImageProcessor, SiglipVisionModel,
	)
	from diffusers_helper.hunyuan import (
	encode_prompt_conds, vae_encode, vae_decode, vae_decode_fake,
	)
	from diffusers_helper.utils import (
	save_bcthw_as_mp4, crop_or_pad_yield_mask, soft_append_bcthw,
	resize_and_center_crop, generate_timestamp,
	)
	from diffusers_helper.models.hunyuan_video_packed import HunyuanVideoTransformer3DModelPacked
	from diffusers_helper.pipelines.k_diffusion_hunyuan import sample_hunyuan
	from diffusers_helper.memory import (
	gpu, get_cuda_free_memory_gb, DynamicSwapInstaller,
	unload_complete_models, load_model_as_complete,
	fake_diffusers_current_device, move_model_to_device_with_memory_preservation,
	offload_model_from_device_for_memory_preservation,
	)
	from diffusers_helper.thread_utils import AsyncStream, async_run
	from diffusers_helper.gradio.progress_bar import make_progress_bar_css, make_progress_bar_html
	from diffusers_helper.clip_vision import hf_clip_vision_encode
	from diffusers_helper.bucket_tools import find_nearest_bucket

	# ---------------------------------------------------------------------------
	# SETUP LOGGING & TCMALLOC DETECTION
	# ---------------------------------------------------------------------------
	BASE = os.path.abspath(os.path.dirname(__file__))
	LOG_PATH = os.path.join(BASE, "ghostpack.log")
	logging.basicConfig(
	level=logging.INFO,
	format='[%(asctime)s] %(levelname)s - %(message)s',
	handlers=[logging.FileHandler(LOG_PATH), logging.StreamHandler(sys.stdout)]
	)
	logger = logging.getLogger(__name__)

	def detect_tcmalloc():
	try:
	with open('/proc/self/maps', 'r') as f:
	return 'tcmalloc' in f.read()
	except Exception:
	return False

	logger.info(f"TCMalloc loaded: {detect_tcmalloc()}")

	# ---------------------------------------------------------------------------
	# SETTINGS
	# ---------------------------------------------------------------------------
	SETTINGS_PATH = os.path.join(BASE, "settings.json")
	default_settings = {
	"latent_window": 9,
	"cfg_scale": 1.0,
	"cfg_rescale": 0.0,
	"gpu_keep": 6.0,
	"enable_teacache": True,
	"mp4_crf": 16
	}
	try:
	with open(SETTINGS_PATH, "r") as f:
	loaded = json.load(f)
	settings = loaded if isinstance(loaded, dict) else default_settings
	except:
	settings = default_settings

	def save_settings(lw, cs, cr, gk, teac, crf):
	s = {
	"latent_window": lw,
	"cfg_scale": cs,
	"cfg_rescale": cr,
	"gpu_keep": gk,
	"enable_teacache": teac,
	"mp4_crf": crf
	}
	with open(SETTINGS_PATH, "w") as f:
	json.dump(s, f, indent=2)
	return "✅ Settings saved!"

	def load_logs():
	if os.path.exists(LOG_PATH):
	with open(LOG_PATH, "r") as f:
	return f.read()
	return "No logs found."

	# ---------------------------------------------------------------------------
	# ENV / CACHE
	# ---------------------------------------------------------------------------
	CACHE = os.path.join(BASE, "hf_download")
	os.makedirs(CACHE, exist_ok=True)
	for _v in ("HF_HOME", "TRANSFORMERS_CACHE", "HF_DATASETS_CACHE"):
	os.environ[_v] = CACHE
	os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"

	# ---------------------------------------------------------------------------
	# CLI
	# ---------------------------------------------------------------------------
	ap = argparse.ArgumentParser()
	ap.add_argument("--share", action="store_true")
	ap.add_argument("--server", default="0.0.0.0")
	ap.add_argument("--port", type=int, default=7860)
	ap.add_argument("--inbrowser", action="store_true")
	args = ap.parse_args()

	# ---------------------------------------------------------------------------
	# VRAM
	# ---------------------------------------------------------------------------
	free_gb = get_cuda_free_memory_gb(gpu)
	hi_vram = free_gb > 60
	logger.info(f"[GhostPack] Free VRAM: {free_gb:.1f} GB \| High-VRAM: {hi_vram}")

	# ---------------------------------------------------------------------------
	# MODEL LOAD
	# ---------------------------------------------------------------------------
	def llm(sf):
	return LlamaModel.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo",
	subfolder=sf, torch_dtype=torch.float16
	).cpu().eval()

	def clip(sf):
	return CLIPTextModel.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo",
	subfolder=sf, torch_dtype=torch.float16
	).cpu().eval()

	text_enc = llm("text_encoder")
	text_enc2 = clip("text_encoder_2")
	tok = LlamaTokenizerFast.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo", subfolder="tokenizer"
	)
	tok2 = CLIPTokenizer.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo", subfolder="tokenizer_2"
	)
	vae = AutoencoderKLHunyuanVideo.from_pretrained(
	"hunyuanvideo-community/HunyuanVideo", subfolder="vae", torch_dtype=torch.float16
	).cpu().eval()
	feat_ext = SiglipImageProcessor.from_pretrained(
	"lllyasviel/flux_redux_bfl", subfolder="feature_extractor"
	)
	img_enc = SiglipVisionModel.from_pretrained(
	"lllyasviel/flux_redux_bfl", subfolder="image_encoder", torch_dtype=torch.float16
	).cpu().eval()
	trans = HunyuanVideoTransformer3DModelPacked.from_pretrained(
	"lllyasviel/FramePackI2V_HY", torch_dtype=torch.bfloat16
	).cpu().eval()
	trans.high_quality_fp32_output_for_inference = True

	if not hi_vram:
	vae.enable_slicing(); vae.enable_tiling()
	else:
	for m in (text_enc, text_enc2, img_enc, vae, trans):
	m.to(gpu)

	trans.to(dtype=torch.bfloat16)
	for m in (vae, img_enc, text_enc, text_enc2):
	m.to(dtype=torch.float16)
	for m in (vae, img_enc, text_enc, text_enc2, trans):
	m.requires_grad_(False)
	if not hi_vram:
	DynamicSwapInstaller.install_model(trans, device=gpu)
	DynamicSwapInstaller.install_model(text_enc, device=gpu)

	OUT = os.path.join(BASE, "outputs")
	os.makedirs(OUT, exist_ok=True)
	stream = AsyncStream()

	# ---------------------------------------------------------------------------
	# DEMO IMAGE
	# ---------------------------------------------------------------------------
	demo_path = os.path.join(BASE, "img", "demo.png")
	demo_np = np.array(Image.open(demo_path).convert("RGB")) \
	if os.path.exists(demo_path) else None

	# ---------------------------------------------------------------------------
	# WORKER & WRAPPER
	# ---------------------------------------------------------------------------
	@torch.no_grad()
	def worker(img, p, n_p, sd, secs, win, stp, cfg, gsc, rsc, keep, tea, crf):
	if sd == -1:
	sd = random.randint(0, 2**31 - 1)
	sections = max(round((secs * 30) / (win * 4)), 1)
	job = generate_timestamp()
	stream.output_queue.push(("progress", (None, "", make_progress_bar_html(0, "Start"))))
	try:
	if not hi_vram:
	unload_complete_models(text_enc, text_enc2, img_enc, vae, trans)
	fake_diffusers_current_device(text_enc, gpu)
	load_model_as_complete(text_enc2, gpu)
	lv, cp = encode_prompt_conds(p, text_enc, text_enc2, tok, tok2)
	lv_n, cp_n= (torch.zeros_like(lv), torch.zeros_like(cp)) \
	if cfg == 1 else encode_prompt_conds(n_p, text_enc, text_enc2, tok, tok2)
	lv, m = crop_or_pad_yield_mask(lv, 512)
	lv_n, m_n = crop_or_pad_yield_mask(lv_n, 512)
	H, W, _ = img.shape
	h, w = find_nearest_bucket(H, W, 640)
	img_np = resize_and_center_crop(img, w, h)
	Image.fromarray(img_np).save(os.path.join(OUT, f"{job}.png"))
	img_pt = torch.from_numpy(img_np).float() / 127.5 - 1
	img_pt = img_pt.permute(2, 0, 1)[None, :, None]
	if not hi_vram: load_model_as_complete(vae, gpu)
	start_lat = vae_encode(img_pt, vae)
	if not hi_vram: load_model_as_complete(img_enc, gpu)
	img_hidden= hf_clip_vision_encode(img_np, feat_ext, img_enc).last_hidden_state
	to = trans.dtype
	lv, lv_n, cp, cp_n, img_hidden = [x.to(to) for x in (lv, lv_n, cp, cp_n, img_hidden)]
	gen = torch.Generator("cpu").manual_seed(sd)
	frames = win * 4 - 3
	hist_lat = torch.zeros((1,16,19,h//8,w//8), dtype=torch.float32).cpu()
	hist_px, total = None, 0
	pad_seq = [3] + [2]*(sections-3) + [1,0] if sections>4 else list(reversed(range(sections)))
	for pad in pad_seq:
	last = (pad == 0)
	if stream.input_queue.top() == "end":
	stream.output_queue.push(("end", None)); return
	pad_sz = pad * win
	idx = torch.arange(0, sum([1,pad_sz,win,1,2,16])).unsqueeze(0)
	a,b,c,d,e,f = idx.split([1,pad_sz,win,1,2,16],1)
	clean_idx = torch.cat([a,d],1)
	pre = start_lat.to(hist_lat)
	post,two,four = hist_lat[:,:,:19].split([1,2,16],2)
	clean = torch.cat([pre,post],2)
	if not hi_vram:
	unload_complete_models()
	move_model_to_device_with_memory_preservation(trans, gpu, keep)
	trans.initialize_teacache(tea, stp)
	def cb(d):
	pv = vae_decode_fake(d["denoised"])
	pv = (pv*255).cpu().numpy().clip(0,255).astype(np.uint8)
	pv = einops.rearrange(pv, "b c t h w->(b h)(t w)c")
	cur = d["i"] + 1
	stream.output_queue.push(("progress", (pv, f"{total*4-3}f",
	make_progress_bar_html(int(100*cur/stp), f"{cur}/{stp}"))))
	if stream.input_queue.top() == "end":
	stream.output_queue.push(("end", None)); raise KeyboardInterrupt
	new_lat = sample_hunyuan(
	transformer=trans, sampler="unipc", width=w, height=h, frames=frames,
	real_guidance_scale=cfg, distilled_guidance_scale=gsc, guidance_rescale=rsc,
	num_inference_steps=stp, generator=gen,
	prompt_embeds=lv, prompt_embeds_mask=m, prompt_poolers=cp,
	negative_prompt_embeds=lv_n, negative_prompt_embeds_mask=m_n, negative_prompt_poolers=cp_n,
	device=gpu, dtype=torch.bfloat16, image_embeddings=img_hidden,
	latent_indices=c, clean_latents=clean, clean_latent_indices=clean_idx,
	clean_latents_2x=two, clean_latent_2x_indices=e,
	clean_latents_4x=four, clean_latent_4x_indices=f,
	callback=cb
	)
	if last:
	new_lat = torch.cat([start_lat.to(new_lat), new_lat], 2)
	total += new_lat.shape[2]
	hist_lat = torch.cat([new_lat.to(hist_lat), hist_lat], 2)
	if not hi_vram:
	offload_model_from_device_for_memory_preservation(trans, gpu, 8)
	load_model_as_complete(vae, gpu)
	real = hist_lat[:,:,:total]
	if hist_px is None:
	hist_px = vae_decode(real, vae).cpu()
	else:
	sec_lat = win2 + 1 if last else win2
	cur_px = vae_decode(real[:,:,:sec_lat], vae).cpu()
	hist_px = soft_append_bcthw(cur_px, hist_px, win*4-3)
	if not hi_vram:
	unload_complete_models()
	mp4 = os.path.join(OUT, f"{job}_{total}.mp4")
	save_bcthw_as_mp4(hist_px, mp4, fps=30, crf=crf)
	stream.output_queue.push(("file", mp4))
	if last: break
	except Exception:
	traceback.print_exc()
	stream.output_queue.push(("end", None))

	def wrapper(*args):
	yield from wrapper_logic(*args)

	def wrapper_logic(img,p,n_p,sd,secs,win,stp,cfg,gsc,rsc,keep,tea,crf):
	global stream
	if img is None:
	raise gr.Error("Upload an image.")
	yield None,None,"","","",gr.update(interactive=False),gr.update(interactive=True)
	stream = AsyncStream()
	async_run(worker, img, p, n_p, sd, secs, win, stp, cfg, gsc, rsc, keep, tea, crf)
	out, log = None, ""
	while True:
	flag, data = stream.output_queue.next()
	if flag == "file":
	out = data
	yield out, gr.update(), gr.update(), gr.update(), log, gr.update(interactive=False), gr.update(interactive=True)
	if flag == "progress":
	pv, desc, html = data
	log = desc
	yield gr.update(), gr.update(visible=True, value=pv), desc, html, log, gr.update(interactive=False), gr.update(interactive=True)
	if flag == "end":
	yield out, gr.update(visible=False), gr.update(), "", log, gr.update(interactive=True), gr.update(interactive=False)
	break

	# ---------------------------------------------------------------------------
	# UI
	# ---------------------------------------------------------------------------
	def run_ui():
	css = make_progress_bar_css() + """
	body,.gradio-container{background:#111821;color:#eee;font-family:Roboto,Arial,sans-serif}
	.gr-button,.gr-button-primary{background:#006dff;border:#006dff;font-size:16px;padding:10px 22px}
	.gr-button-primary.save{background:#28a745;border:#28a745}
	.gr-button-primary.save:hover{background:#3ec06d;border:#3ec06d}
	.gr-button:hover,.gr-button-primary:hover{background:#0099ff;border:#0099ff}
	input,textarea,.gr-input,.gr-textbox,.gr-slider,.gr-number{background:#1e1e1e;color:#eee;border-color:#006dff}
	.info-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(320px,1fr));gap:24px;margin-top:20px}
	.info-card{background:#0d1b2a;padding:28px;border:2px solid #0099ff;border-radius:14px;box-shadow:0 0 18px #000a}
	.info-card h2{margin:0 0 16px 0;color:#00e6ff;font-size:26px}
	.info-card p{margin:0 0 18px 0;line-height:1.6}
	.info-card ul,ol{margin:0;padding-left:22px}
	.info-card li{margin-bottom:12px}
	.info-card a{color:#37c4ff;text-decoration:none;font-weight:600}
	.info-card a:hover{text-decoration:underline;color:#6fe0ff}
	"""
	positive_examples = [
	["A neon specter gliding through a cyberpunk cityscape at dusk."],
	["An astronaut exploring an alien jungle on Mars."],
	["A brave knight facing a dragon in a stormy courtyard."],
	["A futuristic city skyline at night with flying cars."]
	]
	negative_examples = [
	["low quality, blurry, watermark, distorted, deformed"]
	]
	blk = gr.Blocks(css=css).queue()
	with blk:
	gr.Markdown("## 👻 GhostPack – Phantom Float Math Edition")
	with gr.Tabs():
	with gr.TabItem("🎛️ Generator"):
	with gr.Row():
	with gr.Column(scale=6):
	img = gr.Image(sources=["upload"], type="numpy", label="Input Image", height=360, value=demo_np)
	prm = gr.Textbox(label="Prompt", placeholder="Enter positive prompt")
	ds_pos = gr.Dataset(samples=positive_examples, label="🎨 Quick Prompts", components=[prm])
	ds_pos.click(lambda x: x[0], inputs=[ds_pos], outputs=[prm])
	npr = gr.Textbox(label="Negative Prompt", placeholder="Enter negative prompt")
	ds_neg = gr.Dataset(samples=negative_examples, label="🚫 Neg Prompts", components=[npr])
	ds_neg.click(lambda x: x[0], inputs=[ds_neg], outputs=[npr])
	se = gr.Number(label="Seed", value=-1, precision=0, minimum=-1)
	sec = gr.Slider(label="Length (s)", minimum=1, maximum=120, value=5, step=0.1)
	win = gr.Slider(label="Latent Window", minimum=1, maximum=33, value=settings["latent_window"], step=1)
	stp = gr.Slider(label="Steps", minimum=1, maximum=100, value=25, step=1)
	cfg = gr.Slider(label="CFG Scale", minimum=1, maximum=32, value=settings["cfg_scale"], step=0.01)
	gsc = gr.Slider(label="Distilled CFG", minimum=1, maximum=32, value=settings["cfg_scale"], step=0.01)
	rsc = gr.Slider(label="Guidance Rescale", minimum=0, maximum=1, value=settings["cfg_rescale"], step=0.01)
	keep = gr.Slider(label="GPU Keep (GB)", minimum=6, maximum=128, value=settings["gpu_keep"], step=0.1)
	tea = gr.Checkbox(label="Use TeaCache", value=settings["enable_teacache"])
	crf = gr.Slider(label="MP4 CRF", minimum=0, maximum=100, value=settings["mp4_crf"], step=1)
	b_go, b_end = gr.Button("Start 🚀"), gr.Button("End", interactive=False)
	with gr.Column(scale=5):
	vid = gr.Video(label="Output Video", autoplay=True, height=540, loop=True)
	log = gr.Markdown()
	inputs = [img, prm, npr, se, sec, win, stp, cfg, gsc, rsc, keep, tea, crf]
	b_go.click(fn=wrapper, inputs=inputs, outputs=[vid, gr.Image(visible=False), gr.Markdown(), gr.HTML(), log, b_go, b_end])
	b_end.click(fn=lambda: stream.input_queue.push("end"))
	with gr.TabItem("⚙️ Settings"):
	gr.Markdown("### ⚙️ Advanced Settings & Save")
	lw = gr.Slider(label="Latent Window", minimum=1, maximum=33, value=settings["latent_window"], step=1)
	cs = gr.Slider(label="CFG Scale", minimum=1, maximum=32, value=settings["cfg_scale"], step=0.01)
	cr = gr.Slider(label="CFG Rescale", minimum=0, maximum=1, value=settings["cfg_rescale"], step=0.01)
	gk = gr.Slider(label="GPU Keep (GB)", minimum=6, maximum=128, value=settings["gpu_keep"], step=0.1)
	te_s = gr.Checkbox(label="Enable TeaCache", value=settings["enable_teacache"])
	crf_s= gr.Slider(label="MP4 CRF", minimum=0, maximum=100, value=settings["mp4_crf"], step=1)
	save_btn = gr.Button("Save Settings ✅", elem_classes="save")
	save_status = gr.Markdown("")
	save_btn.click(fn=save_settings, inputs=[lw, cs, cr, gk, te_s, crf_s], outputs=[save_status])
	with gr.TabItem("📝 Logs"):
	gr.Markdown("### 📝 GhostPack Logs")
	log_area = gr.Textbox(label="Logs", lines=20, interactive=False, value=load_logs())
	refresh_btn = gr.Button("🔄 Refresh Logs")
	refresh_btn.click(fn=load_logs, inputs=None, outputs=[log_area])
	with gr.TabItem("ℹ️ About Me"):
	gr.HTML("""
	<div class="info-grid">
	<div class="info-card">
	<h2>👻 GhostAI: AI Media Innovator</h2>
	<p>I'm a DevOps AI engineer specializing in autonomous media pipelines. My passion is crafting cutting-edge AI tools for video, audio, and automation.</p>
	<p><b>Mission:</b> Empower creators with fast, innovative AI solutions.<br>
	<b>Projects:</b> GhostPack Video Generator, GhostAI Music Generator.<br>
	<b>Vision:</b> Redefine media creation with AI-driven precision.</p>
	<p>
	🔗 <a href="https://huggingface.co/ghostai1">HuggingFace Profile</a><br>
	✉️ <a href="mailto:ghostai@example.com">Contact Me</a>
	</p>
	</div>
	</div>
	""")
	with gr.TabItem("📸 Showcase"):
	gr.HTML("""
	<div class="info-grid">
	<div class="info-card">
	<h2>✨ GhostPack Showcase</h2>
	<p>Experience cinematic AI video creation with GhostPack, powered by advanced neural networks for fluid motion and stunning visuals. From cyberpunk cityscapes to alien jungles, GhostPack brings your imagination to life. Inspired by the success of the GhostAI Music Generator, this tool is your gateway to next-gen media creation.</p>
	<div align="center">
	<img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/2RH49llUgKsmWY7Hu8yBD.gif"
	alt="GhostPack Animated Banner"
	style="width:920px;height:260px;max-width:100%;border-radius:18px;box-shadow:0 0 48px #00ffcc;margin-bottom:36px;">
	<p><b>GhostPack in Action:</b> Dynamic video generation with phantom-like precision.</p>
	</div>
	<div align="center">
	<img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/k8pgUlg4OvdUZpbMNTcp5.gif"
	alt="GhostPack Demo GIF"
	style="width:470px;height:auto;border-radius:18px;box-shadow:0 0 32px #ff00ff;margin-bottom:28px;">
	<p><b>Demo:</b> Create videos from a single image and prompt.</p>
	</div>
	<div style="display:flex;justify-content:center;gap:28px;">
	<div>
	<img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/7ABE2lOA4LOUtPfh1mhxP.png"
	alt="Main Interface"
	style="width:320px;height:auto;border-radius:12px;box-shadow:0 0 18px #00ffcc;">
	<p><b>Main Interface:</b> Intuitive controls for video generation.</p>
	</div>
	<div>
	<img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/9qNVRX2eM2iCY8xQKcOwW.png"
	alt="Advanced Settings"
	style="width:320px;height:auto;border-radius:12px;box-shadow:0 0 18px #00ffcc;">
	<p><b>Settings:</b> Fine-tune for optimal performance.</p>
	</div>
	<div>
	<img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/--fIS9ITg4-VqN22ySoa2.png"
	alt="Logs Display"
	style="width:320px;height:auto;border-radius:12px;box-shadow:0 0 18px #00ffcc;">
	<p><b>Logs:</b> Monitor VRAM and generation progress.</p>
	</div>
	</div>
	</div>
	</div>
	""")
	blk.launch(server_name=args.server, server_port=args.port, share=args.share, inbrowser=args.inbrowser)

	if __name__ == "__main__":
	run_ui()