Spaces:

ghostai1
/

GhostPack

Running

App Files Files Community

ghostai1 commited on Jun 12

Commit

6e6a051

verified ·

1 Parent(s): 7d79c17

Update app.py

Browse files

Files changed (1) hide show

app.py +256 -125

app.py CHANGED Viewed

@@ -2,7 +2,7 @@
 # ---------------------------------------------------------------------------
 # RELEASE – GhostPack Video Generator (📸 Showcase Tab Polished)
 # ---------------------------------------------------------------------------
-import os, sys, argparse, random, traceback
 import numpy as np, torch, einops, gradio as gr
 from PIL import Image
 from diffusers import AutoencoderKLHunyuanVideo
@@ -31,9 +31,67 @@ from diffusers_helper.clip_vision import hf_clip_vision_encode
 from diffusers_helper.bucket_tools import find_nearest_bucket
 # ---------------------------------------------------------------------------
-# ENV / CACHE
 # ---------------------------------------------------------------------------
 BASE = os.path.abspath(os.path.dirname(__file__))
 CACHE = os.path.join(BASE, "hf_download")
 os.makedirs(CACHE, exist_ok=True)
 for _v in ("HF_HOME", "TRANSFORMERS_CACHE", "HF_DATASETS_CACHE"):
@@ -55,32 +113,56 @@ args = ap.parse_args()
 # ---------------------------------------------------------------------------
 free_gb = get_cuda_free_memory_gb(gpu)
 hi_vram = free_gb > 60
-print(f"[GhostPack] Free VRAM: {free_gb:.1f} GB | High-VRAM: {hi_vram}")
 # ---------------------------------------------------------------------------
 # MODEL LOAD
 # ---------------------------------------------------------------------------
-def llm(sf):  return LlamaModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder=sf, torch_dtype=torch.float16).cpu().eval()
-def clip(sf): return CLIPTextModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder=sf, torch_dtype=torch.float16).cpu().eval()
 text_enc  = llm("text_encoder")
 text_enc2 = clip("text_encoder_2")
-tok       = LlamaTokenizerFast.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="tokenizer")
-tok2      = CLIPTokenizer.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="tokenizer_2")
-vae       = AutoencoderKLHunyuanVideo.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="vae", torch_dtype=torch.float16).cpu().eval()
-feat_ext  = SiglipImageProcessor.from_pretrained("lllyasviel/flux_redux_bfl", subfolder="feature_extractor")
-img_enc   = SiglipVisionModel.from_pretrained("lllyasviel/flux_redux_bfl", subfolder="image_encoder", torch_dtype=torch.float16).cpu().eval()
-trans     = HunyuanVideoTransformer3DModelPacked.from_pretrained("lllyasviel/FramePackI2V_HY", torch_dtype=torch.bfloat16).cpu().eval()
 trans.high_quality_fp32_output_for_inference = True
 if not hi_vram:
     vae.enable_slicing(); vae.enable_tiling()
 else:
-    for _m in (text_enc, text_enc2, img_enc, vae, trans): _m.to(gpu)
 trans.to(dtype=torch.bfloat16)
-for _m in (vae, img_enc, text_enc, text_enc2): _m.to(dtype=torch.float16)
-for _m in (vae, img_enc, text_enc, text_enc2, trans): _m.requires_grad_(False)
 if not hi_vram:
     DynamicSwapInstaller.install_model(trans, device=gpu)
     DynamicSwapInstaller.install_model(text_enc, device=gpu)
@@ -93,10 +175,11 @@ stream = AsyncStream()
 # DEMO IMAGE
 # ---------------------------------------------------------------------------
 demo_path = os.path.join(BASE, "img", "demo.png")
-demo_np = np.array(Image.open(demo_path).convert("RGB")) if os.path.exists(demo_path) else None
 # ---------------------------------------------------------------------------
-# WORKER
 # ---------------------------------------------------------------------------
 @torch.no_grad()
 def worker(img, p, n_p, sd, secs, win, stp, cfg, gsc, rsc, keep, tea, crf):
@@ -106,12 +189,13 @@ def worker(img, p, n_p, sd, secs, win, stp, cfg, gsc, rsc, keep, tea, crf):
     job = generate_timestamp()
     stream.output_queue.push(("progress", (None, "", make_progress_bar_html(0, "Start"))))
     try:
-        if not hi_vram: unload_complete_models(text_enc, text_enc2, img_enc, vae, trans)
         if not hi_vram:
             fake_diffusers_current_device(text_enc, gpu)
             load_model_as_complete(text_enc2, gpu)
-        lv, cp = encode_prompt_conds(p, text_enc, text_enc2, tok, tok2)
-        lv_n, cp_n = (torch.zeros_like(lv), torch.zeros_like(cp)) if cfg == 1 else encode_prompt_conds(n_p, text_enc, text_enc2, tok, tok2)
         lv, m     = crop_or_pad_yield_mask(lv, 512)
         lv_n, m_n = crop_or_pad_yield_mask(lv_n, 512)
         H, W, _   = img.shape
@@ -123,40 +207,38 @@ def worker(img, p, n_p, sd, secs, win, stp, cfg, gsc, rsc, keep, tea, crf):
         if not hi_vram: load_model_as_complete(vae, gpu)
         start_lat = vae_encode(img_pt, vae)
         if not hi_vram: load_model_as_complete(img_enc, gpu)
-        img_hidden = hf_clip_vision_encode(img_np, feat_ext, img_enc).last_hidden_state
         to = trans.dtype
-        lv, lv_n, cp, cp_n, img_hidden = (x.to(to) for x in (lv, lv_n, cp, cp_n, img_hidden))
         gen = torch.Generator("cpu").manual_seed(sd)
-        frames = win * 4 - 3
-        hist_lat = torch.zeros((1, 16, 19, h // 8, w // 8), dtype=torch.float32).cpu()
         hist_px, total = None, 0
-        pad_seq = [3] + [2] * (sections - 3) + [1, 0] if sections > 4 else list(reversed(range(sections)))
         for pad in pad_seq:
-            last = pad == 0
             if stream.input_queue.top() == "end":
-                stream.output_queue.push(("end", None))
-                return
             pad_sz = pad * win
-            idx = torch.arange(0, sum([1, pad_sz, win, 1, 2, 16])).unsqueeze(0)
-            a, b, c, d, e, f = idx.split([1, pad_sz, win, 1, 2, 16], 1)
-            clean_idx = torch.cat([a, d], 1)
             pre = start_lat.to(hist_lat)
-            post, two, four = hist_lat[:, :, :19].split([1, 2, 16], 2)
-            clean = torch.cat([pre, post], 2)
             if not hi_vram:
                 unload_complete_models()
                 move_model_to_device_with_memory_preservation(trans, gpu, keep)
             trans.initialize_teacache(tea, stp)
             def cb(d):
                 pv = vae_decode_fake(d["denoised"])
-                pv = (pv * 255).cpu().numpy().clip(0, 255).astype(np.uint8)
                 pv = einops.rearrange(pv, "b c t h w->(b h)(t w)c")
                 cur = d["i"] + 1
-                stream.output_queue.push(("progress", (pv, f"{total * 4 - 3}f",
-                                                       make_progress_bar_html(int(100 * cur / stp), f"{cur}/{stp}"))))
                 if stream.input_queue.top() == "end":
-                    stream.output_queue.push(("end", None))
-                    raise KeyboardInterrupt
             new_lat = sample_hunyuan(
                 transformer=trans, sampler="unipc", width=w, height=h, frames=frames,
                 real_guidance_scale=cfg, distilled_guidance_scale=gsc, guidance_rescale=rsc,
@@ -165,50 +247,67 @@ def worker(img, p, n_p, sd, secs, win, stp, cfg, gsc, rsc, keep, tea, crf):
                 negative_prompt_embeds=lv_n, negative_prompt_embeds_mask=m_n, negative_prompt_poolers=cp_n,
                 device=gpu, dtype=torch.bfloat16, image_embeddings=img_hidden,
                 latent_indices=c, clean_latents=clean, clean_latent_indices=clean_idx,
-                clean_latents_2x=two, clean_latent_2x_indices=e, clean_latents_4x=four, clean_latent_4x_indices=f, callback=cb)
-            if last: new_lat = torch.cat([start_lat.to(new_lat), new_lat], 2)
-            total += new_lat.shape[2]; hist_lat = torch.cat([new_lat.to(hist_lat), hist_lat], 2)
             if not hi_vram:
                 offload_model_from_device_for_memory_preservation(trans, gpu, 8)
                 load_model_as_complete(vae, gpu)
-            real = hist_lat[:, :, :total]
-            if hist_px is None: hist_px = vae_decode(real, vae).cpu()
             else:
-                sec_lat = win * 2 + 1 if last else win * 2
-                cur_px  = vae_decode(real[:, :, :sec_lat], vae).cpu()
-                hist_px = soft_append_bcthw(cur_px, hist_px, win * 4 - 3)
-            if not hi_vram: unload_complete_models()
             mp4 = os.path.join(OUT, f"{job}_{total}.mp4")
             save_bcthw_as_mp4(hist_px, mp4, fps=30, crf=crf)
             stream.output_queue.push(("file", mp4))
             if last: break
     except Exception:
-        traceback.print_exc(); stream.output_queue.push(("end", None))
-def wrapper(*args): yield from wrapper_logic(*args)
 def wrapper_logic(img,p,n_p,sd,secs,win,stp,cfg,gsc,rsc,keep,tea,crf):
     global stream
-    if img is None: raise gr.Error("Upload an image.")
     yield None,None,"","","",gr.update(interactive=False),gr.update(interactive=True)
-    stream = AsyncStream(); async_run(worker,img,p,n_p,sd,secs,win,stp,cfg,gsc,rsc,keep,tea,crf)
-    out,log=None,""
     while True:
-        flag,data = stream.output_queue.next()
-        if flag=="file":
-            out = data;yield out,gr.update(),gr.update(),gr.update(),log,gr.update(interactive=False),gr.update(interactive=True)
-        if flag=="progress":
-            pv,desc,html=data; log=desc
-            yield gr.update(),gr.update(visible=True,value=pv),desc,html,log,gr.update(interactive=False),gr.update(interactive=True)
-        if flag=="end":
-            yield out,gr.update(visible=False),gr.update(),"",log,gr.update(interactive=True),gr.update(interactive=False); break
 # ---------------------------------------------------------------------------
 # UI
 # ---------------------------------------------------------------------------
 def run_ui():
-    css = make_progress_bar_css()+"""
     body,.gradio-container{background:#111821;color:#eee;font-family:Roboto,Arial,sans-serif}
     .gr-button,.gr-button-primary{background:#006dff;border:#006dff;font-size:16px;padding:10px 22px}
     .gr-button:hover,.gr-button-primary:hover{background:#0099ff;border:#0099ff}
     input,textarea,.gr-input,.gr-textbox,.gr-slider,.gr-number{background:#1e1e1e;color:#eee;border-color:#006dff}
     .info-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(320px,1fr));gap:24px;margin-top:20px}
@@ -220,88 +319,120 @@ def run_ui():
     .info-card a{color:#37c4ff;text-decoration:none;font-weight:600}
     .info-card a:hover{text-decoration:underline;color:#6fe0ff}
     """
-    quick=[["The girl dances gracefully, with clear movements, full of charm."],
-           ["A character doing some simple body movements."]]
-    blk=gr.Blocks(css=css).queue()
     with blk:
         gr.Markdown("## 👻 **GhostPack – Phantom Float Math Edition**")
         with gr.Tabs():
-            # ------------- Generator TAB -------------
             with gr.TabItem("🎛️ Generator"):
                 with gr.Row():
                     with gr.Column(scale=6):
-                        img = gr.Image(sources=["upload"], type="numpy", label="Image", height=360, value=demo_np)
-                        prm = gr.Textbox(label="Prompt")
-                        ds  = gr.Dataset(samples=quick, label="Quick List", components=[prm])
-                        ds.click(lambda x: x[0], inputs=[ds], outputs=prm)
-                        with gr.Row():
-                            b_go  = gr.Button("Start")
-                            b_end = gr.Button("End", interactive=False)
-                        with gr.Group():
-                            tea = gr.Checkbox(label="Use TeaCache", value=True)
-                            npr = gr.Textbox(label="Negative Prompt", visible=False)
-                            se  = gr.Number(label="Seed (-1 ⇒ random)", value=-1, precision=0, minimum=-1)
-                            rand = gr.Button("🎲 Random"); rand.click(lambda: -1, None, se)
-                            sec = gr.Slider(label="Video Length (s)", minimum=1, maximum=120, value=5, step=0.1)
-                            win = gr.Slider(label="Latent Window", minimum=1, maximum=33, value=9, step=1, visible=False)
-                            stp = gr.Slider(label="Steps", minimum=1, maximum=100, value=25, step=1)
-                            cfg = gr.Slider(label="CFG", minimum=1, maximum=32, value=1, step=0.01, visible=False)
-                            gsc = gr.Slider(label="Distilled CFG", minimum=1, maximum=32, value=10, step=0.01)
-                            rsc = gr.Slider(label="CFG Re-Scale", minimum=0, maximum=1, value=0, step=0.01, visible=False)
-                            kee = gr.Slider(label="GPU Keep (GB)", minimum=6, maximum=128, value=6, step=0.1)
-                            crf = gr.Slider(label="MP4 CRF", minimum=0, maximum=100, value=16, step=1)
                     with gr.Column(scale=5):
-                        vid = gr.Video(label="Finished", autoplay=True, height=540, loop=True, show_share_button=False)
-                        pv  = gr.Image(label="Progress Latents", height=220, visible=False, interactive=False)
-                        dsc = gr.Markdown(""); bar = gr.HTML(""); log = gr.Markdown("")
-                inputs=[img,prm,npr,se,sec,win,stp,cfg,gsc,rsc,kee,tea,crf]
-                b_go.click(fn=wrapper,inputs=inputs,outputs=[vid,pv,dsc,bar,log,b_go,b_end])
-                b_end.click(fn=lambda:stream.input_queue.push("end"))
-            # ------------- Build Info TAB -------------
-            with gr.TabItem("📜 Docs"):
                 gr.HTML("""
 <div class="info-grid">
-<div class="info-card"><h2>👤 About GHOSTAI</h2><p>DevOps engineer crafting autonomous media pipelines.<br><b>Motto:</b> “Ship fast, break norms, wow creators.”</p></div>
-<div class="info-card"><h2>⚙️ Tech &amp; Tuning</h2><p>GhostPack is a <strong>Phantom Float Math</strong> fork of FramePack + Hunyuan.</p>
-<ul><li><b>Seed</b>: −1 random</li><li><b>Steps</b>: 15-40</li><li><b>Distilled CFG</b>: 7-12</li><li><b>CRF</b>: 0-23 HQ</li></ul></div>
-<div class="info-card"><h2>🛠️ Quick-Start</h2>
-<ol><li>Upload or keep demo ghost.</li><li>Type vivid prompt.</li><li>Choose 8-10 s length.</li><li>Click <b>Start</b>.</li><li>Tweak CFG &amp; Steps.</li></ol></div>
-<div class="info-card"><h2>🚀 Projects</h2>
-<ul><li><a href="https://huggingface.co/spaces/ghostai1/GhostPack" target="_blank">GhostPack Space</a></li>
-<li><a href="https://huggingface.co/ghostai1/GHOSTSONAFB" target="_blank">GhostSona Music (soon)</a></li>
-<li><a href="https://huggingface.co/spaces/ghostai1/GhostPack/discussions" target="_blank">Community Forum</a></li></ul></div>
-<div class="info-card"><h2>🌐 Connect</h2><ul><li><a href="https://huggingface.co/ghostai1" target="_blank">HuggingFace Profile</a></li></ul></div>
 </div>
 """)
-            # ------------- SHOWCASE TAB -------------
             with gr.TabItem("📸 Showcase"):
-                gr.HTML(f"""
-<div style="text-align:center">
-  <img src="file/{os.path.join('img','banner.gif')}" alt="GhostPack Banner" style="max-width:100%;border-radius:16px;box-shadow:0 0 32px #00ffcc;margin-bottom:28px">
-</div>
 <div class="info-grid">
   <div class="info-card">
-    <h2>🎬 Demo Clip</h2>
-    <video src="file/{os.path.join('img','example_demo.mp4')}" controls style="width:100%;border-radius:10px;box-shadow:0 0 18px #0099ff"></video>
-    <p>Neon-lit spectral battle generated entirely with GhostPack at 30 FPS.</p>
-  </div>
-  <div class="info-card">
-    <h2>📸 UI Screens</h2>
-    <img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/7ABE2lOA4LOUtPfh1mhxP.png" style="width:100%;border-radius:8px;margin-bottom:12px">
-    <img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/9qNVRX2eM2iCY8xQKcOwW.png" style="width:100%;border-radius:8px;margin-bottom:12px">
-    <img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/--fIS9ITg4-VqN22ySoa2.png" style="width:100%;border-radius:8px">
-  </div>
-  <div class="info-card">
-    <h2>🧭 Usage Tips</h2>
-    <ul>
-      <li><b>Cinematic 30 FPS:</b> keep <i>Latent Window</i> 9 for smooth motion.</li>
-      <li><b>Long Shots:</b> set <i>Video Length</i> 12 s + and <i>Steps</i> ≥ 35.</li>
-      <li><b>Quick Drafts:</b> enable TeaCache &amp; keep <i>Distilled CFG</i> ≤ 8.</li>
-    </ul>
   </div>
 </div>
 """)
     blk.launch(server_name=args.server, server_port=args.port, share=args.share, inbrowser=args.inbrowser)
 if __name__ == "__main__":
-    run_ui()

 # ---------------------------------------------------------------------------
 # RELEASE – GhostPack Video Generator (📸 Showcase Tab Polished)
 # ---------------------------------------------------------------------------
+import os, sys, argparse, random, traceback, json, logging
 import numpy as np, torch, einops, gradio as gr
 from PIL import Image
 from diffusers import AutoencoderKLHunyuanVideo
 from diffusers_helper.bucket_tools import find_nearest_bucket
 # ---------------------------------------------------------------------------
+# SETUP LOGGING & TCMALLOC DETECTION
 # ---------------------------------------------------------------------------
 BASE = os.path.abspath(os.path.dirname(__file__))
+LOG_PATH = os.path.join(BASE, "ghostpack.log")
+logging.basicConfig(
+    level=logging.INFO,
+    format='[%(asctime)s] %(levelname)s - %(message)s',
+    handlers=[logging.FileHandler(LOG_PATH), logging.StreamHandler(sys.stdout)]
+)
+logger = logging.getLogger(__name__)
+def detect_tcmalloc():
+    try:
+        with open('/proc/self/maps', 'r') as f:
+            return 'tcmalloc' in f.read()
+    except Exception:
+        return False
+logger.info(f"TCMalloc loaded: {detect_tcmalloc()}")
+# ---------------------------------------------------------------------------
+# SETTINGS
+# ---------------------------------------------------------------------------
+SETTINGS_PATH = os.path.join(BASE, "settings.json")
+default_settings = {
+    "latent_window": 9,
+    "cfg_scale": 1.0,
+    "cfg_rescale": 0.0,
+    "gpu_keep": 6.0,
+    "enable_teacache": True,
+    "mp4_crf": 16
+}
+try:
+    with open(SETTINGS_PATH, "r") as f:
+        loaded = json.load(f)
+        settings = loaded if isinstance(loaded, dict) else default_settings
+except:
+    settings = default_settings
+def save_settings(lw, cs, cr, gk, teac, crf):
+    s = {
+        "latent_window": lw,
+        "cfg_scale": cs,
+        "cfg_rescale": cr,
+        "gpu_keep": gk,
+        "enable_teacache": teac,
+        "mp4_crf": crf
+    }
+    with open(SETTINGS_PATH, "w") as f:
+        json.dump(s, f, indent=2)
+    return "✅ Settings saved!"
+def load_logs():
+    if os.path.exists(LOG_PATH):
+        with open(LOG_PATH, "r") as f:
+            return f.read()
+    return "No logs found."
+# ---------------------------------------------------------------------------
+# ENV / CACHE
+# ---------------------------------------------------------------------------
 CACHE = os.path.join(BASE, "hf_download")
 os.makedirs(CACHE, exist_ok=True)
 for _v in ("HF_HOME", "TRANSFORMERS_CACHE", "HF_DATASETS_CACHE"):
 # ---------------------------------------------------------------------------
 free_gb = get_cuda_free_memory_gb(gpu)
 hi_vram = free_gb > 60
+logger.info(f"[GhostPack] Free VRAM: {free_gb:.1f} GB | High-VRAM: {hi_vram}")
 # ---------------------------------------------------------------------------
 # MODEL LOAD
 # ---------------------------------------------------------------------------
+def llm(sf):
+    return LlamaModel.from_pretrained(
+        "hunyuanvideo-community/HunyuanVideo",
+        subfolder=sf, torch_dtype=torch.float16
+    ).cpu().eval()
+def clip(sf):
+    return CLIPTextModel.from_pretrained(
+        "hunyuanvideo-community/HunyuanVideo",
+        subfolder=sf, torch_dtype=torch.float16
+    ).cpu().eval()
 text_enc  = llm("text_encoder")
 text_enc2 = clip("text_encoder_2")
+tok       = LlamaTokenizerFast.from_pretrained(
+    "hunyuanvideo-community/HunyuanVideo", subfolder="tokenizer"
+)
+tok2      = CLIPTokenizer.from_pretrained(
+    "hunyuanvideo-community/HunyuanVideo", subfolder="tokenizer_2"
+)
+vae       = AutoencoderKLHunyuanVideo.from_pretrained(
+    "hunyuanvideo-community/HunyuanVideo", subfolder="vae", torch_dtype=torch.float16
+).cpu().eval()
+feat_ext  = SiglipImageProcessor.from_pretrained(
+    "lllyasviel/flux_redux_bfl", subfolder="feature_extractor"
+)
+img_enc   = SiglipVisionModel.from_pretrained(
+    "lllyasviel/flux_redux_bfl", subfolder="image_encoder", torch_dtype=torch.float16
+).cpu().eval()
+trans     = HunyuanVideoTransformer3DModelPacked.from_pretrained(
+    "lllyasviel/FramePackI2V_HY", torch_dtype=torch.bfloat16
+).cpu().eval()
 trans.high_quality_fp32_output_for_inference = True
 if not hi_vram:
     vae.enable_slicing(); vae.enable_tiling()
 else:
+    for m in (text_enc, text_enc2, img_enc, vae, trans):
+        m.to(gpu)
 trans.to(dtype=torch.bfloat16)
+for m in (vae, img_enc, text_enc, text_enc2):
+    m.to(dtype=torch.float16)
+for m in (vae, img_enc, text_enc, text_enc2, trans):
+    m.requires_grad_(False)
 if not hi_vram:
     DynamicSwapInstaller.install_model(trans, device=gpu)
     DynamicSwapInstaller.install_model(text_enc, device=gpu)
 # DEMO IMAGE
 # ---------------------------------------------------------------------------
 demo_path = os.path.join(BASE, "img", "demo.png")
+demo_np = np.array(Image.open(demo_path).convert("RGB")) \
+    if os.path.exists(demo_path) else None
 # ---------------------------------------------------------------------------
+# WORKER & WRAPPER
 # ---------------------------------------------------------------------------
 @torch.no_grad()
 def worker(img, p, n_p, sd, secs, win, stp, cfg, gsc, rsc, keep, tea, crf):
     job = generate_timestamp()
     stream.output_queue.push(("progress", (None, "", make_progress_bar_html(0, "Start"))))
     try:
         if not hi_vram:
+            unload_complete_models(text_enc, text_enc2, img_enc, vae, trans)
             fake_diffusers_current_device(text_enc, gpu)
             load_model_as_complete(text_enc2, gpu)
+        lv, cp    = encode_prompt_conds(p, text_enc, text_enc2, tok, tok2)
+        lv_n, cp_n= (torch.zeros_like(lv), torch.zeros_like(cp)) \
+            if cfg == 1 else encode_prompt_conds(n_p, text_enc, text_enc2, tok, tok2)
         lv, m     = crop_or_pad_yield_mask(lv, 512)
         lv_n, m_n = crop_or_pad_yield_mask(lv_n, 512)
         H, W, _   = img.shape
         if not hi_vram: load_model_as_complete(vae, gpu)
         start_lat = vae_encode(img_pt, vae)
         if not hi_vram: load_model_as_complete(img_enc, gpu)
+        img_hidden= hf_clip_vision_encode(img_np, feat_ext, img_enc).last_hidden_state
         to = trans.dtype
+        lv, lv_n, cp, cp_n, img_hidden = [x.to(to) for x in (lv, lv_n, cp, cp_n, img_hidden)]
         gen = torch.Generator("cpu").manual_seed(sd)
+        frames   = win * 4 - 3
+        hist_lat = torch.zeros((1,16,19,h//8,w//8), dtype=torch.float32).cpu()
         hist_px, total = None, 0
+        pad_seq = [3] + [2]*(sections-3) + [1,0] if sections>4 else list(reversed(range(sections)))
         for pad in pad_seq:
+            last = (pad == 0)
             if stream.input_queue.top() == "end":
+                stream.output_queue.push(("end", None)); return
             pad_sz = pad * win
+            idx = torch.arange(0, sum([1,pad_sz,win,1,2,16])).unsqueeze(0)
+            a,b,c,d,e,f = idx.split([1,pad_sz,win,1,2,16],1)
+            clean_idx = torch.cat([a,d],1)
             pre = start_lat.to(hist_lat)
+            post,two,four = hist_lat[:,:,:19].split([1,2,16],2)
+            clean = torch.cat([pre,post],2)
             if not hi_vram:
                 unload_complete_models()
                 move_model_to_device_with_memory_preservation(trans, gpu, keep)
             trans.initialize_teacache(tea, stp)
             def cb(d):
                 pv = vae_decode_fake(d["denoised"])
+                pv = (pv*255).cpu().numpy().clip(0,255).astype(np.uint8)
                 pv = einops.rearrange(pv, "b c t h w->(b h)(t w)c")
                 cur = d["i"] + 1
+                stream.output_queue.push(("progress", (pv, f"{total*4-3}f",
+                                                       make_progress_bar_html(int(100*cur/stp), f"{cur}/{stp}"))))
                 if stream.input_queue.top() == "end":
+                    stream.output_queue.push(("end", None)); raise KeyboardInterrupt
             new_lat = sample_hunyuan(
                 transformer=trans, sampler="unipc", width=w, height=h, frames=frames,
                 real_guidance_scale=cfg, distilled_guidance_scale=gsc, guidance_rescale=rsc,
                 negative_prompt_embeds=lv_n, negative_prompt_embeds_mask=m_n, negative_prompt_poolers=cp_n,
                 device=gpu, dtype=torch.bfloat16, image_embeddings=img_hidden,
                 latent_indices=c, clean_latents=clean, clean_latent_indices=clean_idx,
+                clean_latents_2x=two, clean_latent_2x_indices=e,
+                clean_latents_4x=four, clean_latent_4x_indices=f,
+                callback=cb
+            )
+            if last:
+                new_lat = torch.cat([start_lat.to(new_lat), new_lat], 2)
+            total += new_lat.shape[2]
+            hist_lat = torch.cat([new_lat.to(hist_lat), hist_lat], 2)
             if not hi_vram:
                 offload_model_from_device_for_memory_preservation(trans, gpu, 8)
                 load_model_as_complete(vae, gpu)
+            real = hist_lat[:,:,:total]
+            if hist_px is None:
+                hist_px = vae_decode(real, vae).cpu()
             else:
+                sec_lat = win*2 + 1 if last else win*2
+                cur_px = vae_decode(real[:,:,:sec_lat], vae).cpu()
+                hist_px = soft_append_bcthw(cur_px, hist_px, win*4-3)
+            if not hi_vram:
+                unload_complete_models()
             mp4 = os.path.join(OUT, f"{job}_{total}.mp4")
             save_bcthw_as_mp4(hist_px, mp4, fps=30, crf=crf)
             stream.output_queue.push(("file", mp4))
             if last: break
     except Exception:
+        traceback.print_exc()
+        stream.output_queue.push(("end", None))
+def wrapper(*args):
+    yield from wrapper_logic(*args)
 def wrapper_logic(img,p,n_p,sd,secs,win,stp,cfg,gsc,rsc,keep,tea,crf):
     global stream
+    if img is None:
+        raise gr.Error("Upload an image.")
     yield None,None,"","","",gr.update(interactive=False),gr.update(interactive=True)
+    stream = AsyncStream()
+    async_run(worker, img, p, n_p, sd, secs, win, stp, cfg, gsc, rsc, keep, tea, crf)
+    out, log = None, ""
     while True:
+        flag, data = stream.output_queue.next()
+        if flag == "file":
+            out = data
+            yield out, gr.update(), gr.update(), gr.update(), log, gr.update(interactive=False), gr.update(interactive=True)
+        if flag == "progress":
+            pv, desc, html = data
+            log = desc
+            yield gr.update(), gr.update(visible=True, value=pv), desc, html, log, gr.update(interactive=False), gr.update(interactive=True)
+        if flag == "end":
+            yield out, gr.update(visible=False), gr.update(), "", log, gr.update(interactive=True), gr.update(interactive=False)
+            break
 # ---------------------------------------------------------------------------
 # UI
 # ---------------------------------------------------------------------------
 def run_ui():
+    css = make_progress_bar_css() + """
     body,.gradio-container{background:#111821;color:#eee;font-family:Roboto,Arial,sans-serif}
     .gr-button,.gr-button-primary{background:#006dff;border:#006dff;font-size:16px;padding:10px 22px}
+    .gr-button-primary.save{background:#28a745;border:#28a745}
+    .gr-button-primary.save:hover{background:#3ec06d;border:#3ec06d}
     .gr-button:hover,.gr-button-primary:hover{background:#0099ff;border:#0099ff}
     input,textarea,.gr-input,.gr-textbox,.gr-slider,.gr-number{background:#1e1e1e;color:#eee;border-color:#006dff}
     .info-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(320px,1fr));gap:24px;margin-top:20px}
     .info-card a{color:#37c4ff;text-decoration:none;font-weight:600}
     .info-card a:hover{text-decoration:underline;color:#6fe0ff}
     """
+    positive_examples = [
+        ["A neon specter gliding through a cyberpunk cityscape at dusk."],
+        ["An astronaut exploring an alien jungle on Mars."],
+        ["A brave knight facing a dragon in a stormy courtyard."],
+        ["A futuristic city skyline at night with flying cars."]
+    ]
+    negative_examples = [
+        ["low quality, blurry, watermark, distorted, deformed"]
+    ]
+    blk = gr.Blocks(css=css).queue()
     with blk:
         gr.Markdown("## 👻 **GhostPack – Phantom Float Math Edition**")
         with gr.Tabs():
             with gr.TabItem("🎛️ Generator"):
                 with gr.Row():
                     with gr.Column(scale=6):
+                        img = gr.Image(sources=["upload"], type="numpy", label="Input Image", height=360, value=demo_np)
+                        prm = gr.Textbox(label="Prompt", placeholder="Enter positive prompt")
+                        ds_pos = gr.Dataset(samples=positive_examples, label="🎨 Quick Prompts", components=[prm])
+                        ds_pos.click(lambda x: x[0], inputs=[ds_pos], outputs=[prm])
+                        npr = gr.Textbox(label="Negative Prompt", placeholder="Enter negative prompt")
+                        ds_neg = gr.Dataset(samples=negative_examples, label="🚫 Neg Prompts", components=[npr])
+                        ds_neg.click(lambda x: x[0], inputs=[ds_neg], outputs=[npr])
+                        se  = gr.Number(label="Seed", value=-1, precision=0, minimum=-1)
+                        sec = gr.Slider(label="Length (s)", minimum=1, maximum=120, value=5, step=0.1)
+                        win = gr.Slider(label="Latent Window", minimum=1, maximum=33, value=settings["latent_window"], step=1)
+                        stp = gr.Slider(label="Steps", minimum=1, maximum=100, value=25, step=1)
+                        cfg = gr.Slider(label="CFG Scale", minimum=1, maximum=32, value=settings["cfg_scale"], step=0.01)
+                        gsc = gr.Slider(label="Distilled CFG", minimum=1, maximum=32, value=settings["cfg_scale"], step=0.01)
+                        rsc = gr.Slider(label="Guidance Rescale", minimum=0, maximum=1, value=settings["cfg_rescale"], step=0.01)
+                        keep = gr.Slider(label="GPU Keep (GB)", minimum=6, maximum=128, value=settings["gpu_keep"], step=0.1)
+                        tea  = gr.Checkbox(label="Use TeaCache", value=settings["enable_teacache"])
+                        crf  = gr.Slider(label="MP4 CRF", minimum=0, maximum=100, value=settings["mp4_crf"], step=1)
+                        b_go, b_end = gr.Button("Start 🚀"), gr.Button("End", interactive=False)
                     with gr.Column(scale=5):
+                        vid = gr.Video(label="Output Video", autoplay=True, height=540, loop=True)
+                        log = gr.Markdown()
+                inputs = [img, prm, npr, se, sec, win, stp, cfg, gsc, rsc, keep, tea, crf]
+                b_go.click(fn=wrapper, inputs=inputs, outputs=[vid, gr.Image(visible=False), gr.Markdown(), gr.HTML(), log, b_go, b_end])
+                b_end.click(fn=lambda: stream.input_queue.push("end"))
+            with gr.TabItem("⚙️ Settings"):
+                gr.Markdown("### ⚙️ Advanced Settings & Save")
+                lw   = gr.Slider(label="Latent Window", minimum=1, maximum=33, value=settings["latent_window"], step=1)
+                cs   = gr.Slider(label="CFG Scale", minimum=1, maximum=32, value=settings["cfg_scale"], step=0.01)
+                cr   = gr.Slider(label="CFG Rescale", minimum=0, maximum=1, value=settings["cfg_rescale"], step=0.01)
+                gk   = gr.Slider(label="GPU Keep (GB)", minimum=6, maximum=128, value=settings["gpu_keep"], step=0.1)
+                te_s = gr.Checkbox(label="Enable TeaCache", value=settings["enable_teacache"])
+                crf_s= gr.Slider(label="MP4 CRF", minimum=0, maximum=100, value=settings["mp4_crf"], step=1)
+                save_btn = gr.Button("Save Settings ✅", elem_classes="save")
+                save_status = gr.Markdown("")
+                save_btn.click(fn=save_settings, inputs=[lw, cs, cr, gk, te_s, crf_s], outputs=[save_status])
+            with gr.TabItem("📝 Logs"):
+                gr.Markdown("### 📝 GhostPack Logs")
+                log_area = gr.Textbox(label="Logs", lines=20, interactive=False, value=load_logs())
+                refresh_btn = gr.Button("🔄 Refresh Logs")
+                refresh_btn.click(fn=load_logs, inputs=None, outputs=[log_area])
+            with gr.TabItem("ℹ️ About Me"):
                 gr.HTML("""
 <div class="info-grid">
+  <div class="info-card">
+    <h2>👻 GhostAI: AI Media Innovator</h2>
+    <p>I'm a DevOps AI engineer specializing in autonomous media pipelines. My passion is crafting cutting-edge AI tools for video, audio, and automation.</p>
+    <p><b>Mission:</b> Empower creators with fast, innovative AI solutions.<br>
+       <b>Projects:</b> GhostPack Video Generator, GhostAI Music Generator.<br>
+       <b>Vision:</b> Redefine media creation with AI-driven precision.</p>
+    <p>
+      🔗 <a href="https://huggingface.co/ghostai1">HuggingFace Profile</a><br>
+      ✉️ <a href="mailto:ghostai@example.com">Contact Me</a>
+    </p>
+  </div>
 </div>
 """)
             with gr.TabItem("📸 Showcase"):
+                gr.HTML("""
 <div class="info-grid">
   <div class="info-card">
+    <h2>✨ GhostPack Showcase</h2>
+    <p>Experience cinematic AI video creation with GhostPack, powered by advanced neural networks for fluid motion and stunning visuals. From cyberpunk cityscapes to alien jungles, GhostPack brings your imagination to life. Inspired by the success of the GhostAI Music Generator, this tool is your gateway to next-gen media creation.</p>
+    <div align="center">
+      <img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/2RH49llUgKsmWY7Hu8yBD.gif"
+           alt="GhostPack Animated Banner"
+           style="width:920px;height:260px;max-width:100%;border-radius:18px;box-shadow:0 0 48px #00ffcc;margin-bottom:36px;">
+      <p><b>GhostPack in Action:</b> Dynamic video generation with phantom-like precision.</p>
+    </div>
+    <div align="center">
+      <img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/k8pgUlg4OvdUZpbMNTcp5.gif"
+           alt="GhostPack Demo GIF"
+           style="width:470px;height:auto;border-radius:18px;box-shadow:0 0 32px #ff00ff;margin-bottom:28px;">
+      <p><b>Demo:</b> Create videos from a single image and prompt.</p>
+    </div>
+    <div style="display:flex;justify-content:center;gap:28px;">
+      <div>
+        <img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/7ABE2lOA4LOUtPfh1mhxP.png"
+             alt="Main Interface"
+             style="width:320px;height:auto;border-radius:12px;box-shadow:0 0 18px #00ffcc;">
+        <p><b>Main Interface:</b> Intuitive controls for video generation.</p>
+      </div>
+      <div>
+        <img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/9qNVRX2eM2iCY8xQKcOwW.png"
+             alt="Advanced Settings"
+             style="width:320px;height:auto;border-radius:12px;box-shadow:0 0 18px #00ffcc;">
+        <p><b>Settings:</b> Fine-tune for optimal performance.</p>
+      </div>
+      <div>
+        <img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/--fIS9ITg4-VqN22ySoa2.png"
+             alt="Logs Display"
+             style="width:320px;height:auto;border-radius:12px;box-shadow:0 0 18px #00ffcc;">
+        <p><b>Logs:</b> Monitor VRAM and generation progress.</p>
+      </div>
+    </div>
   </div>
 </div>
 """)
     blk.launch(server_name=args.server, server_port=args.port, share=args.share, inbrowser=args.inbrowser)
 if __name__ == "__main__":
+    run_ui()