ghostai1 commited on
Commit
6e6a051
Β·
verified Β·
1 Parent(s): 7d79c17

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +256 -125
app.py CHANGED
@@ -2,7 +2,7 @@
2
  # ---------------------------------------------------------------------------
3
  # RELEASE – GhostPack Video Generator (πŸ“Έ Showcase Tab Polished)
4
  # ---------------------------------------------------------------------------
5
- import os, sys, argparse, random, traceback
6
  import numpy as np, torch, einops, gradio as gr
7
  from PIL import Image
8
  from diffusers import AutoencoderKLHunyuanVideo
@@ -31,9 +31,67 @@ from diffusers_helper.clip_vision import hf_clip_vision_encode
31
  from diffusers_helper.bucket_tools import find_nearest_bucket
32
 
33
  # ---------------------------------------------------------------------------
34
- # ENV / CACHE
35
  # ---------------------------------------------------------------------------
36
  BASE = os.path.abspath(os.path.dirname(__file__))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  CACHE = os.path.join(BASE, "hf_download")
38
  os.makedirs(CACHE, exist_ok=True)
39
  for _v in ("HF_HOME", "TRANSFORMERS_CACHE", "HF_DATASETS_CACHE"):
@@ -55,32 +113,56 @@ args = ap.parse_args()
55
  # ---------------------------------------------------------------------------
56
  free_gb = get_cuda_free_memory_gb(gpu)
57
  hi_vram = free_gb > 60
58
- print(f"[GhostPack] Free VRAM: {free_gb:.1f} GB | High-VRAM: {hi_vram}")
59
 
60
  # ---------------------------------------------------------------------------
61
  # MODEL LOAD
62
  # ---------------------------------------------------------------------------
63
- def llm(sf): return LlamaModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder=sf, torch_dtype=torch.float16).cpu().eval()
64
- def clip(sf): return CLIPTextModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder=sf, torch_dtype=torch.float16).cpu().eval()
 
 
 
 
 
 
 
 
 
65
 
66
  text_enc = llm("text_encoder")
67
  text_enc2 = clip("text_encoder_2")
68
- tok = LlamaTokenizerFast.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="tokenizer")
69
- tok2 = CLIPTokenizer.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="tokenizer_2")
70
- vae = AutoencoderKLHunyuanVideo.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder="vae", torch_dtype=torch.float16).cpu().eval()
71
- feat_ext = SiglipImageProcessor.from_pretrained("lllyasviel/flux_redux_bfl", subfolder="feature_extractor")
72
- img_enc = SiglipVisionModel.from_pretrained("lllyasviel/flux_redux_bfl", subfolder="image_encoder", torch_dtype=torch.float16).cpu().eval()
73
- trans = HunyuanVideoTransformer3DModelPacked.from_pretrained("lllyasviel/FramePackI2V_HY", torch_dtype=torch.bfloat16).cpu().eval()
 
 
 
 
 
 
 
 
 
 
 
 
74
  trans.high_quality_fp32_output_for_inference = True
75
 
76
  if not hi_vram:
77
  vae.enable_slicing(); vae.enable_tiling()
78
  else:
79
- for _m in (text_enc, text_enc2, img_enc, vae, trans): _m.to(gpu)
 
80
 
81
  trans.to(dtype=torch.bfloat16)
82
- for _m in (vae, img_enc, text_enc, text_enc2): _m.to(dtype=torch.float16)
83
- for _m in (vae, img_enc, text_enc, text_enc2, trans): _m.requires_grad_(False)
 
 
84
  if not hi_vram:
85
  DynamicSwapInstaller.install_model(trans, device=gpu)
86
  DynamicSwapInstaller.install_model(text_enc, device=gpu)
@@ -93,10 +175,11 @@ stream = AsyncStream()
93
  # DEMO IMAGE
94
  # ---------------------------------------------------------------------------
95
  demo_path = os.path.join(BASE, "img", "demo.png")
96
- demo_np = np.array(Image.open(demo_path).convert("RGB")) if os.path.exists(demo_path) else None
 
97
 
98
  # ---------------------------------------------------------------------------
99
- # WORKER
100
  # ---------------------------------------------------------------------------
101
  @torch.no_grad()
102
  def worker(img, p, n_p, sd, secs, win, stp, cfg, gsc, rsc, keep, tea, crf):
@@ -106,12 +189,13 @@ def worker(img, p, n_p, sd, secs, win, stp, cfg, gsc, rsc, keep, tea, crf):
106
  job = generate_timestamp()
107
  stream.output_queue.push(("progress", (None, "", make_progress_bar_html(0, "Start"))))
108
  try:
109
- if not hi_vram: unload_complete_models(text_enc, text_enc2, img_enc, vae, trans)
110
  if not hi_vram:
 
111
  fake_diffusers_current_device(text_enc, gpu)
112
  load_model_as_complete(text_enc2, gpu)
113
- lv, cp = encode_prompt_conds(p, text_enc, text_enc2, tok, tok2)
114
- lv_n, cp_n = (torch.zeros_like(lv), torch.zeros_like(cp)) if cfg == 1 else encode_prompt_conds(n_p, text_enc, text_enc2, tok, tok2)
 
115
  lv, m = crop_or_pad_yield_mask(lv, 512)
116
  lv_n, m_n = crop_or_pad_yield_mask(lv_n, 512)
117
  H, W, _ = img.shape
@@ -123,40 +207,38 @@ def worker(img, p, n_p, sd, secs, win, stp, cfg, gsc, rsc, keep, tea, crf):
123
  if not hi_vram: load_model_as_complete(vae, gpu)
124
  start_lat = vae_encode(img_pt, vae)
125
  if not hi_vram: load_model_as_complete(img_enc, gpu)
126
- img_hidden = hf_clip_vision_encode(img_np, feat_ext, img_enc).last_hidden_state
127
  to = trans.dtype
128
- lv, lv_n, cp, cp_n, img_hidden = (x.to(to) for x in (lv, lv_n, cp, cp_n, img_hidden))
129
  gen = torch.Generator("cpu").manual_seed(sd)
130
- frames = win * 4 - 3
131
- hist_lat = torch.zeros((1, 16, 19, h // 8, w // 8), dtype=torch.float32).cpu()
132
  hist_px, total = None, 0
133
- pad_seq = [3] + [2] * (sections - 3) + [1, 0] if sections > 4 else list(reversed(range(sections)))
134
  for pad in pad_seq:
135
- last = pad == 0
136
  if stream.input_queue.top() == "end":
137
- stream.output_queue.push(("end", None))
138
- return
139
  pad_sz = pad * win
140
- idx = torch.arange(0, sum([1, pad_sz, win, 1, 2, 16])).unsqueeze(0)
141
- a, b, c, d, e, f = idx.split([1, pad_sz, win, 1, 2, 16], 1)
142
- clean_idx = torch.cat([a, d], 1)
143
  pre = start_lat.to(hist_lat)
144
- post, two, four = hist_lat[:, :, :19].split([1, 2, 16], 2)
145
- clean = torch.cat([pre, post], 2)
146
  if not hi_vram:
147
  unload_complete_models()
148
  move_model_to_device_with_memory_preservation(trans, gpu, keep)
149
  trans.initialize_teacache(tea, stp)
150
  def cb(d):
151
  pv = vae_decode_fake(d["denoised"])
152
- pv = (pv * 255).cpu().numpy().clip(0, 255).astype(np.uint8)
153
  pv = einops.rearrange(pv, "b c t h w->(b h)(t w)c")
154
  cur = d["i"] + 1
155
- stream.output_queue.push(("progress", (pv, f"{total * 4 - 3}f",
156
- make_progress_bar_html(int(100 * cur / stp), f"{cur}/{stp}"))))
157
  if stream.input_queue.top() == "end":
158
- stream.output_queue.push(("end", None))
159
- raise KeyboardInterrupt
160
  new_lat = sample_hunyuan(
161
  transformer=trans, sampler="unipc", width=w, height=h, frames=frames,
162
  real_guidance_scale=cfg, distilled_guidance_scale=gsc, guidance_rescale=rsc,
@@ -165,50 +247,67 @@ def worker(img, p, n_p, sd, secs, win, stp, cfg, gsc, rsc, keep, tea, crf):
165
  negative_prompt_embeds=lv_n, negative_prompt_embeds_mask=m_n, negative_prompt_poolers=cp_n,
166
  device=gpu, dtype=torch.bfloat16, image_embeddings=img_hidden,
167
  latent_indices=c, clean_latents=clean, clean_latent_indices=clean_idx,
168
- clean_latents_2x=two, clean_latent_2x_indices=e, clean_latents_4x=four, clean_latent_4x_indices=f, callback=cb)
169
- if last: new_lat = torch.cat([start_lat.to(new_lat), new_lat], 2)
170
- total += new_lat.shape[2]; hist_lat = torch.cat([new_lat.to(hist_lat), hist_lat], 2)
 
 
 
 
 
171
  if not hi_vram:
172
  offload_model_from_device_for_memory_preservation(trans, gpu, 8)
173
  load_model_as_complete(vae, gpu)
174
- real = hist_lat[:, :, :total]
175
- if hist_px is None: hist_px = vae_decode(real, vae).cpu()
 
176
  else:
177
- sec_lat = win * 2 + 1 if last else win * 2
178
- cur_px = vae_decode(real[:, :, :sec_lat], vae).cpu()
179
- hist_px = soft_append_bcthw(cur_px, hist_px, win * 4 - 3)
180
- if not hi_vram: unload_complete_models()
 
181
  mp4 = os.path.join(OUT, f"{job}_{total}.mp4")
182
  save_bcthw_as_mp4(hist_px, mp4, fps=30, crf=crf)
183
  stream.output_queue.push(("file", mp4))
184
  if last: break
185
  except Exception:
186
- traceback.print_exc(); stream.output_queue.push(("end", None))
 
 
 
 
187
 
188
- def wrapper(*args): yield from wrapper_logic(*args)
189
  def wrapper_logic(img,p,n_p,sd,secs,win,stp,cfg,gsc,rsc,keep,tea,crf):
190
  global stream
191
- if img is None: raise gr.Error("Upload an image.")
 
192
  yield None,None,"","","",gr.update(interactive=False),gr.update(interactive=True)
193
- stream = AsyncStream(); async_run(worker,img,p,n_p,sd,secs,win,stp,cfg,gsc,rsc,keep,tea,crf)
194
- out,log=None,""
 
195
  while True:
196
- flag,data = stream.output_queue.next()
197
- if flag=="file":
198
- out = data;yield out,gr.update(),gr.update(),gr.update(),log,gr.update(interactive=False),gr.update(interactive=True)
199
- if flag=="progress":
200
- pv,desc,html=data; log=desc
201
- yield gr.update(),gr.update(visible=True,value=pv),desc,html,log,gr.update(interactive=False),gr.update(interactive=True)
202
- if flag=="end":
203
- yield out,gr.update(visible=False),gr.update(),"",log,gr.update(interactive=True),gr.update(interactive=False); break
 
 
 
204
 
205
  # ---------------------------------------------------------------------------
206
  # UI
207
  # ---------------------------------------------------------------------------
208
  def run_ui():
209
- css = make_progress_bar_css()+"""
210
  body,.gradio-container{background:#111821;color:#eee;font-family:Roboto,Arial,sans-serif}
211
  .gr-button,.gr-button-primary{background:#006dff;border:#006dff;font-size:16px;padding:10px 22px}
 
 
212
  .gr-button:hover,.gr-button-primary:hover{background:#0099ff;border:#0099ff}
213
  input,textarea,.gr-input,.gr-textbox,.gr-slider,.gr-number{background:#1e1e1e;color:#eee;border-color:#006dff}
214
  .info-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(320px,1fr));gap:24px;margin-top:20px}
@@ -220,88 +319,120 @@ def run_ui():
220
  .info-card a{color:#37c4ff;text-decoration:none;font-weight:600}
221
  .info-card a:hover{text-decoration:underline;color:#6fe0ff}
222
  """
223
- quick=[["The girl dances gracefully, with clear movements, full of charm."],
224
- ["A character doing some simple body movements."]]
225
- blk=gr.Blocks(css=css).queue()
 
 
 
 
 
 
 
226
  with blk:
227
  gr.Markdown("## πŸ‘» **GhostPack – Phantom Float Math Edition**")
228
  with gr.Tabs():
229
- # ------------- Generator TAB -------------
230
  with gr.TabItem("πŸŽ›οΈ Generator"):
231
  with gr.Row():
232
  with gr.Column(scale=6):
233
- img = gr.Image(sources=["upload"], type="numpy", label="Image", height=360, value=demo_np)
234
- prm = gr.Textbox(label="Prompt")
235
- ds = gr.Dataset(samples=quick, label="Quick List", components=[prm])
236
- ds.click(lambda x: x[0], inputs=[ds], outputs=prm)
237
- with gr.Row():
238
- b_go = gr.Button("Start")
239
- b_end = gr.Button("End", interactive=False)
240
- with gr.Group():
241
- tea = gr.Checkbox(label="Use TeaCache", value=True)
242
- npr = gr.Textbox(label="Negative Prompt", visible=False)
243
- se = gr.Number(label="Seed (-1 β‡’ random)", value=-1, precision=0, minimum=-1)
244
- rand = gr.Button("🎲 Random"); rand.click(lambda: -1, None, se)
245
- sec = gr.Slider(label="Video Length (s)", minimum=1, maximum=120, value=5, step=0.1)
246
- win = gr.Slider(label="Latent Window", minimum=1, maximum=33, value=9, step=1, visible=False)
247
- stp = gr.Slider(label="Steps", minimum=1, maximum=100, value=25, step=1)
248
- cfg = gr.Slider(label="CFG", minimum=1, maximum=32, value=1, step=0.01, visible=False)
249
- gsc = gr.Slider(label="Distilled CFG", minimum=1, maximum=32, value=10, step=0.01)
250
- rsc = gr.Slider(label="CFG Re-Scale", minimum=0, maximum=1, value=0, step=0.01, visible=False)
251
- kee = gr.Slider(label="GPU Keep (GB)", minimum=6, maximum=128, value=6, step=0.1)
252
- crf = gr.Slider(label="MP4 CRF", minimum=0, maximum=100, value=16, step=1)
253
  with gr.Column(scale=5):
254
- vid = gr.Video(label="Finished", autoplay=True, height=540, loop=True, show_share_button=False)
255
- pv = gr.Image(label="Progress Latents", height=220, visible=False, interactive=False)
256
- dsc = gr.Markdown(""); bar = gr.HTML(""); log = gr.Markdown("")
257
- inputs=[img,prm,npr,se,sec,win,stp,cfg,gsc,rsc,kee,tea,crf]
258
- b_go.click(fn=wrapper,inputs=inputs,outputs=[vid,pv,dsc,bar,log,b_go,b_end])
259
- b_end.click(fn=lambda:stream.input_queue.push("end"))
260
- # ------------- Build Info TAB -------------
261
- with gr.TabItem("πŸ“œ Docs"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  gr.HTML("""
263
  <div class="info-grid">
264
- <div class="info-card"><h2>πŸ‘€ About GHOSTAI</h2><p>DevOps engineer crafting autonomous media pipelines.<br><b>Motto:</b> β€œShip fast, break norms, wow creators.”</p></div>
265
- <div class="info-card"><h2>βš™οΈ Tech &amp; Tuning</h2><p>GhostPack is a <strong>Phantom Float Math</strong> fork of FramePack + Hunyuan.</p>
266
- <ul><li><b>Seed</b>: βˆ’1 random</li><li><b>Steps</b>: 15-40</li><li><b>Distilled CFG</b>: 7-12</li><li><b>CRF</b>: 0-23 HQ</li></ul></div>
267
- <div class="info-card"><h2>πŸ› οΈ Quick-Start</h2>
268
- <ol><li>Upload or keep demo ghost.</li><li>Type vivid prompt.</li><li>Choose 8-10 s length.</li><li>Click <b>Start</b>.</li><li>Tweak CFG &amp; Steps.</li></ol></div>
269
- <div class="info-card"><h2>πŸš€ Projects</h2>
270
- <ul><li><a href="https://huggingface.co/spaces/ghostai1/GhostPack" target="_blank">GhostPack Space</a></li>
271
- <li><a href="https://huggingface.co/ghostai1/GHOSTSONAFB" target="_blank">GhostSona Music (soon)</a></li>
272
- <li><a href="https://huggingface.co/spaces/ghostai1/GhostPack/discussions" target="_blank">Community Forum</a></li></ul></div>
273
- <div class="info-card"><h2>🌐 Connect</h2><ul><li><a href="https://huggingface.co/ghostai1" target="_blank">HuggingFace Profile</a></li></ul></div>
 
274
  </div>
275
  """)
276
- # ------------- SHOWCASE TAB -------------
277
  with gr.TabItem("πŸ“Έ Showcase"):
278
- gr.HTML(f"""
279
- <div style="text-align:center">
280
- <img src="file/{os.path.join('img','banner.gif')}" alt="GhostPack Banner" style="max-width:100%;border-radius:16px;box-shadow:0 0 32px #00ffcc;margin-bottom:28px">
281
- </div>
282
  <div class="info-grid">
283
  <div class="info-card">
284
- <h2>🎬 Demo Clip</h2>
285
- <video src="file/{os.path.join('img','example_demo.mp4')}" controls style="width:100%;border-radius:10px;box-shadow:0 0 18px #0099ff"></video>
286
- <p>Neon-lit spectral battle generated entirely with GhostPack at 30 FPS.</p>
287
- </div>
288
- <div class="info-card">
289
- <h2>πŸ“Έ UI Screens</h2>
290
- <img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/7ABE2lOA4LOUtPfh1mhxP.png" style="width:100%;border-radius:8px;margin-bottom:12px">
291
- <img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/9qNVRX2eM2iCY8xQKcOwW.png" style="width:100%;border-radius:8px;margin-bottom:12px">
292
- <img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/--fIS9ITg4-VqN22ySoa2.png" style="width:100%;border-radius:8px">
293
- </div>
294
- <div class="info-card">
295
- <h2>🧭 Usage Tips</h2>
296
- <ul>
297
- <li><b>Cinematic 30 FPS:</b> keep <i>Latent Window</i> 9 for smooth motion.</li>
298
- <li><b>Long Shots:</b> set <i>Video Length</i> 12 s + and <i>Steps</i> β‰₯ 35.</li>
299
- <li><b>Quick Drafts:</b> enable TeaCache &amp; keep <i>Distilled CFG</i> ≀ 8.</li>
300
- </ul>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  </div>
302
  </div>
303
  """)
304
  blk.launch(server_name=args.server, server_port=args.port, share=args.share, inbrowser=args.inbrowser)
305
 
306
  if __name__ == "__main__":
307
- run_ui()
 
2
  # ---------------------------------------------------------------------------
3
  # RELEASE – GhostPack Video Generator (πŸ“Έ Showcase Tab Polished)
4
  # ---------------------------------------------------------------------------
5
+ import os, sys, argparse, random, traceback, json, logging
6
  import numpy as np, torch, einops, gradio as gr
7
  from PIL import Image
8
  from diffusers import AutoencoderKLHunyuanVideo
 
31
  from diffusers_helper.bucket_tools import find_nearest_bucket
32
 
33
  # ---------------------------------------------------------------------------
34
+ # SETUP LOGGING & TCMALLOC DETECTION
35
  # ---------------------------------------------------------------------------
36
  BASE = os.path.abspath(os.path.dirname(__file__))
37
+ LOG_PATH = os.path.join(BASE, "ghostpack.log")
38
+ logging.basicConfig(
39
+ level=logging.INFO,
40
+ format='[%(asctime)s] %(levelname)s - %(message)s',
41
+ handlers=[logging.FileHandler(LOG_PATH), logging.StreamHandler(sys.stdout)]
42
+ )
43
+ logger = logging.getLogger(__name__)
44
+
45
+ def detect_tcmalloc():
46
+ try:
47
+ with open('/proc/self/maps', 'r') as f:
48
+ return 'tcmalloc' in f.read()
49
+ except Exception:
50
+ return False
51
+
52
+ logger.info(f"TCMalloc loaded: {detect_tcmalloc()}")
53
+
54
+ # ---------------------------------------------------------------------------
55
+ # SETTINGS
56
+ # ---------------------------------------------------------------------------
57
+ SETTINGS_PATH = os.path.join(BASE, "settings.json")
58
+ default_settings = {
59
+ "latent_window": 9,
60
+ "cfg_scale": 1.0,
61
+ "cfg_rescale": 0.0,
62
+ "gpu_keep": 6.0,
63
+ "enable_teacache": True,
64
+ "mp4_crf": 16
65
+ }
66
+ try:
67
+ with open(SETTINGS_PATH, "r") as f:
68
+ loaded = json.load(f)
69
+ settings = loaded if isinstance(loaded, dict) else default_settings
70
+ except:
71
+ settings = default_settings
72
+
73
+ def save_settings(lw, cs, cr, gk, teac, crf):
74
+ s = {
75
+ "latent_window": lw,
76
+ "cfg_scale": cs,
77
+ "cfg_rescale": cr,
78
+ "gpu_keep": gk,
79
+ "enable_teacache": teac,
80
+ "mp4_crf": crf
81
+ }
82
+ with open(SETTINGS_PATH, "w") as f:
83
+ json.dump(s, f, indent=2)
84
+ return "βœ… Settings saved!"
85
+
86
+ def load_logs():
87
+ if os.path.exists(LOG_PATH):
88
+ with open(LOG_PATH, "r") as f:
89
+ return f.read()
90
+ return "No logs found."
91
+
92
+ # ---------------------------------------------------------------------------
93
+ # ENV / CACHE
94
+ # ---------------------------------------------------------------------------
95
  CACHE = os.path.join(BASE, "hf_download")
96
  os.makedirs(CACHE, exist_ok=True)
97
  for _v in ("HF_HOME", "TRANSFORMERS_CACHE", "HF_DATASETS_CACHE"):
 
113
  # ---------------------------------------------------------------------------
114
  free_gb = get_cuda_free_memory_gb(gpu)
115
  hi_vram = free_gb > 60
116
+ logger.info(f"[GhostPack] Free VRAM: {free_gb:.1f} GB | High-VRAM: {hi_vram}")
117
 
118
  # ---------------------------------------------------------------------------
119
  # MODEL LOAD
120
  # ---------------------------------------------------------------------------
121
+ def llm(sf):
122
+ return LlamaModel.from_pretrained(
123
+ "hunyuanvideo-community/HunyuanVideo",
124
+ subfolder=sf, torch_dtype=torch.float16
125
+ ).cpu().eval()
126
+
127
+ def clip(sf):
128
+ return CLIPTextModel.from_pretrained(
129
+ "hunyuanvideo-community/HunyuanVideo",
130
+ subfolder=sf, torch_dtype=torch.float16
131
+ ).cpu().eval()
132
 
133
  text_enc = llm("text_encoder")
134
  text_enc2 = clip("text_encoder_2")
135
+ tok = LlamaTokenizerFast.from_pretrained(
136
+ "hunyuanvideo-community/HunyuanVideo", subfolder="tokenizer"
137
+ )
138
+ tok2 = CLIPTokenizer.from_pretrained(
139
+ "hunyuanvideo-community/HunyuanVideo", subfolder="tokenizer_2"
140
+ )
141
+ vae = AutoencoderKLHunyuanVideo.from_pretrained(
142
+ "hunyuanvideo-community/HunyuanVideo", subfolder="vae", torch_dtype=torch.float16
143
+ ).cpu().eval()
144
+ feat_ext = SiglipImageProcessor.from_pretrained(
145
+ "lllyasviel/flux_redux_bfl", subfolder="feature_extractor"
146
+ )
147
+ img_enc = SiglipVisionModel.from_pretrained(
148
+ "lllyasviel/flux_redux_bfl", subfolder="image_encoder", torch_dtype=torch.float16
149
+ ).cpu().eval()
150
+ trans = HunyuanVideoTransformer3DModelPacked.from_pretrained(
151
+ "lllyasviel/FramePackI2V_HY", torch_dtype=torch.bfloat16
152
+ ).cpu().eval()
153
  trans.high_quality_fp32_output_for_inference = True
154
 
155
  if not hi_vram:
156
  vae.enable_slicing(); vae.enable_tiling()
157
  else:
158
+ for m in (text_enc, text_enc2, img_enc, vae, trans):
159
+ m.to(gpu)
160
 
161
  trans.to(dtype=torch.bfloat16)
162
+ for m in (vae, img_enc, text_enc, text_enc2):
163
+ m.to(dtype=torch.float16)
164
+ for m in (vae, img_enc, text_enc, text_enc2, trans):
165
+ m.requires_grad_(False)
166
  if not hi_vram:
167
  DynamicSwapInstaller.install_model(trans, device=gpu)
168
  DynamicSwapInstaller.install_model(text_enc, device=gpu)
 
175
  # DEMO IMAGE
176
  # ---------------------------------------------------------------------------
177
  demo_path = os.path.join(BASE, "img", "demo.png")
178
+ demo_np = np.array(Image.open(demo_path).convert("RGB")) \
179
+ if os.path.exists(demo_path) else None
180
 
181
  # ---------------------------------------------------------------------------
182
+ # WORKER & WRAPPER
183
  # ---------------------------------------------------------------------------
184
  @torch.no_grad()
185
  def worker(img, p, n_p, sd, secs, win, stp, cfg, gsc, rsc, keep, tea, crf):
 
189
  job = generate_timestamp()
190
  stream.output_queue.push(("progress", (None, "", make_progress_bar_html(0, "Start"))))
191
  try:
 
192
  if not hi_vram:
193
+ unload_complete_models(text_enc, text_enc2, img_enc, vae, trans)
194
  fake_diffusers_current_device(text_enc, gpu)
195
  load_model_as_complete(text_enc2, gpu)
196
+ lv, cp = encode_prompt_conds(p, text_enc, text_enc2, tok, tok2)
197
+ lv_n, cp_n= (torch.zeros_like(lv), torch.zeros_like(cp)) \
198
+ if cfg == 1 else encode_prompt_conds(n_p, text_enc, text_enc2, tok, tok2)
199
  lv, m = crop_or_pad_yield_mask(lv, 512)
200
  lv_n, m_n = crop_or_pad_yield_mask(lv_n, 512)
201
  H, W, _ = img.shape
 
207
  if not hi_vram: load_model_as_complete(vae, gpu)
208
  start_lat = vae_encode(img_pt, vae)
209
  if not hi_vram: load_model_as_complete(img_enc, gpu)
210
+ img_hidden= hf_clip_vision_encode(img_np, feat_ext, img_enc).last_hidden_state
211
  to = trans.dtype
212
+ lv, lv_n, cp, cp_n, img_hidden = [x.to(to) for x in (lv, lv_n, cp, cp_n, img_hidden)]
213
  gen = torch.Generator("cpu").manual_seed(sd)
214
+ frames = win * 4 - 3
215
+ hist_lat = torch.zeros((1,16,19,h//8,w//8), dtype=torch.float32).cpu()
216
  hist_px, total = None, 0
217
+ pad_seq = [3] + [2]*(sections-3) + [1,0] if sections>4 else list(reversed(range(sections)))
218
  for pad in pad_seq:
219
+ last = (pad == 0)
220
  if stream.input_queue.top() == "end":
221
+ stream.output_queue.push(("end", None)); return
 
222
  pad_sz = pad * win
223
+ idx = torch.arange(0, sum([1,pad_sz,win,1,2,16])).unsqueeze(0)
224
+ a,b,c,d,e,f = idx.split([1,pad_sz,win,1,2,16],1)
225
+ clean_idx = torch.cat([a,d],1)
226
  pre = start_lat.to(hist_lat)
227
+ post,two,four = hist_lat[:,:,:19].split([1,2,16],2)
228
+ clean = torch.cat([pre,post],2)
229
  if not hi_vram:
230
  unload_complete_models()
231
  move_model_to_device_with_memory_preservation(trans, gpu, keep)
232
  trans.initialize_teacache(tea, stp)
233
  def cb(d):
234
  pv = vae_decode_fake(d["denoised"])
235
+ pv = (pv*255).cpu().numpy().clip(0,255).astype(np.uint8)
236
  pv = einops.rearrange(pv, "b c t h w->(b h)(t w)c")
237
  cur = d["i"] + 1
238
+ stream.output_queue.push(("progress", (pv, f"{total*4-3}f",
239
+ make_progress_bar_html(int(100*cur/stp), f"{cur}/{stp}"))))
240
  if stream.input_queue.top() == "end":
241
+ stream.output_queue.push(("end", None)); raise KeyboardInterrupt
 
242
  new_lat = sample_hunyuan(
243
  transformer=trans, sampler="unipc", width=w, height=h, frames=frames,
244
  real_guidance_scale=cfg, distilled_guidance_scale=gsc, guidance_rescale=rsc,
 
247
  negative_prompt_embeds=lv_n, negative_prompt_embeds_mask=m_n, negative_prompt_poolers=cp_n,
248
  device=gpu, dtype=torch.bfloat16, image_embeddings=img_hidden,
249
  latent_indices=c, clean_latents=clean, clean_latent_indices=clean_idx,
250
+ clean_latents_2x=two, clean_latent_2x_indices=e,
251
+ clean_latents_4x=four, clean_latent_4x_indices=f,
252
+ callback=cb
253
+ )
254
+ if last:
255
+ new_lat = torch.cat([start_lat.to(new_lat), new_lat], 2)
256
+ total += new_lat.shape[2]
257
+ hist_lat = torch.cat([new_lat.to(hist_lat), hist_lat], 2)
258
  if not hi_vram:
259
  offload_model_from_device_for_memory_preservation(trans, gpu, 8)
260
  load_model_as_complete(vae, gpu)
261
+ real = hist_lat[:,:,:total]
262
+ if hist_px is None:
263
+ hist_px = vae_decode(real, vae).cpu()
264
  else:
265
+ sec_lat = win*2 + 1 if last else win*2
266
+ cur_px = vae_decode(real[:,:,:sec_lat], vae).cpu()
267
+ hist_px = soft_append_bcthw(cur_px, hist_px, win*4-3)
268
+ if not hi_vram:
269
+ unload_complete_models()
270
  mp4 = os.path.join(OUT, f"{job}_{total}.mp4")
271
  save_bcthw_as_mp4(hist_px, mp4, fps=30, crf=crf)
272
  stream.output_queue.push(("file", mp4))
273
  if last: break
274
  except Exception:
275
+ traceback.print_exc()
276
+ stream.output_queue.push(("end", None))
277
+
278
+ def wrapper(*args):
279
+ yield from wrapper_logic(*args)
280
 
 
281
  def wrapper_logic(img,p,n_p,sd,secs,win,stp,cfg,gsc,rsc,keep,tea,crf):
282
  global stream
283
+ if img is None:
284
+ raise gr.Error("Upload an image.")
285
  yield None,None,"","","",gr.update(interactive=False),gr.update(interactive=True)
286
+ stream = AsyncStream()
287
+ async_run(worker, img, p, n_p, sd, secs, win, stp, cfg, gsc, rsc, keep, tea, crf)
288
+ out, log = None, ""
289
  while True:
290
+ flag, data = stream.output_queue.next()
291
+ if flag == "file":
292
+ out = data
293
+ yield out, gr.update(), gr.update(), gr.update(), log, gr.update(interactive=False), gr.update(interactive=True)
294
+ if flag == "progress":
295
+ pv, desc, html = data
296
+ log = desc
297
+ yield gr.update(), gr.update(visible=True, value=pv), desc, html, log, gr.update(interactive=False), gr.update(interactive=True)
298
+ if flag == "end":
299
+ yield out, gr.update(visible=False), gr.update(), "", log, gr.update(interactive=True), gr.update(interactive=False)
300
+ break
301
 
302
  # ---------------------------------------------------------------------------
303
  # UI
304
  # ---------------------------------------------------------------------------
305
  def run_ui():
306
+ css = make_progress_bar_css() + """
307
  body,.gradio-container{background:#111821;color:#eee;font-family:Roboto,Arial,sans-serif}
308
  .gr-button,.gr-button-primary{background:#006dff;border:#006dff;font-size:16px;padding:10px 22px}
309
+ .gr-button-primary.save{background:#28a745;border:#28a745}
310
+ .gr-button-primary.save:hover{background:#3ec06d;border:#3ec06d}
311
  .gr-button:hover,.gr-button-primary:hover{background:#0099ff;border:#0099ff}
312
  input,textarea,.gr-input,.gr-textbox,.gr-slider,.gr-number{background:#1e1e1e;color:#eee;border-color:#006dff}
313
  .info-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(320px,1fr));gap:24px;margin-top:20px}
 
319
  .info-card a{color:#37c4ff;text-decoration:none;font-weight:600}
320
  .info-card a:hover{text-decoration:underline;color:#6fe0ff}
321
  """
322
+ positive_examples = [
323
+ ["A neon specter gliding through a cyberpunk cityscape at dusk."],
324
+ ["An astronaut exploring an alien jungle on Mars."],
325
+ ["A brave knight facing a dragon in a stormy courtyard."],
326
+ ["A futuristic city skyline at night with flying cars."]
327
+ ]
328
+ negative_examples = [
329
+ ["low quality, blurry, watermark, distorted, deformed"]
330
+ ]
331
+ blk = gr.Blocks(css=css).queue()
332
  with blk:
333
  gr.Markdown("## πŸ‘» **GhostPack – Phantom Float Math Edition**")
334
  with gr.Tabs():
 
335
  with gr.TabItem("πŸŽ›οΈ Generator"):
336
  with gr.Row():
337
  with gr.Column(scale=6):
338
+ img = gr.Image(sources=["upload"], type="numpy", label="Input Image", height=360, value=demo_np)
339
+ prm = gr.Textbox(label="Prompt", placeholder="Enter positive prompt")
340
+ ds_pos = gr.Dataset(samples=positive_examples, label="🎨 Quick Prompts", components=[prm])
341
+ ds_pos.click(lambda x: x[0], inputs=[ds_pos], outputs=[prm])
342
+ npr = gr.Textbox(label="Negative Prompt", placeholder="Enter negative prompt")
343
+ ds_neg = gr.Dataset(samples=negative_examples, label="🚫 Neg Prompts", components=[npr])
344
+ ds_neg.click(lambda x: x[0], inputs=[ds_neg], outputs=[npr])
345
+ se = gr.Number(label="Seed", value=-1, precision=0, minimum=-1)
346
+ sec = gr.Slider(label="Length (s)", minimum=1, maximum=120, value=5, step=0.1)
347
+ win = gr.Slider(label="Latent Window", minimum=1, maximum=33, value=settings["latent_window"], step=1)
348
+ stp = gr.Slider(label="Steps", minimum=1, maximum=100, value=25, step=1)
349
+ cfg = gr.Slider(label="CFG Scale", minimum=1, maximum=32, value=settings["cfg_scale"], step=0.01)
350
+ gsc = gr.Slider(label="Distilled CFG", minimum=1, maximum=32, value=settings["cfg_scale"], step=0.01)
351
+ rsc = gr.Slider(label="Guidance Rescale", minimum=0, maximum=1, value=settings["cfg_rescale"], step=0.01)
352
+ keep = gr.Slider(label="GPU Keep (GB)", minimum=6, maximum=128, value=settings["gpu_keep"], step=0.1)
353
+ tea = gr.Checkbox(label="Use TeaCache", value=settings["enable_teacache"])
354
+ crf = gr.Slider(label="MP4 CRF", minimum=0, maximum=100, value=settings["mp4_crf"], step=1)
355
+ b_go, b_end = gr.Button("Start πŸš€"), gr.Button("End", interactive=False)
 
 
356
  with gr.Column(scale=5):
357
+ vid = gr.Video(label="Output Video", autoplay=True, height=540, loop=True)
358
+ log = gr.Markdown()
359
+ inputs = [img, prm, npr, se, sec, win, stp, cfg, gsc, rsc, keep, tea, crf]
360
+ b_go.click(fn=wrapper, inputs=inputs, outputs=[vid, gr.Image(visible=False), gr.Markdown(), gr.HTML(), log, b_go, b_end])
361
+ b_end.click(fn=lambda: stream.input_queue.push("end"))
362
+ with gr.TabItem("βš™οΈ Settings"):
363
+ gr.Markdown("### βš™οΈ Advanced Settings & Save")
364
+ lw = gr.Slider(label="Latent Window", minimum=1, maximum=33, value=settings["latent_window"], step=1)
365
+ cs = gr.Slider(label="CFG Scale", minimum=1, maximum=32, value=settings["cfg_scale"], step=0.01)
366
+ cr = gr.Slider(label="CFG Rescale", minimum=0, maximum=1, value=settings["cfg_rescale"], step=0.01)
367
+ gk = gr.Slider(label="GPU Keep (GB)", minimum=6, maximum=128, value=settings["gpu_keep"], step=0.1)
368
+ te_s = gr.Checkbox(label="Enable TeaCache", value=settings["enable_teacache"])
369
+ crf_s= gr.Slider(label="MP4 CRF", minimum=0, maximum=100, value=settings["mp4_crf"], step=1)
370
+ save_btn = gr.Button("Save Settings βœ…", elem_classes="save")
371
+ save_status = gr.Markdown("")
372
+ save_btn.click(fn=save_settings, inputs=[lw, cs, cr, gk, te_s, crf_s], outputs=[save_status])
373
+ with gr.TabItem("πŸ“ Logs"):
374
+ gr.Markdown("### πŸ“ GhostPack Logs")
375
+ log_area = gr.Textbox(label="Logs", lines=20, interactive=False, value=load_logs())
376
+ refresh_btn = gr.Button("πŸ”„ Refresh Logs")
377
+ refresh_btn.click(fn=load_logs, inputs=None, outputs=[log_area])
378
+ with gr.TabItem("ℹ️ About Me"):
379
  gr.HTML("""
380
  <div class="info-grid">
381
+ <div class="info-card">
382
+ <h2>πŸ‘» GhostAI: AI Media Innovator</h2>
383
+ <p>I'm a DevOps AI engineer specializing in autonomous media pipelines. My passion is crafting cutting-edge AI tools for video, audio, and automation.</p>
384
+ <p><b>Mission:</b> Empower creators with fast, innovative AI solutions.<br>
385
+ <b>Projects:</b> GhostPack Video Generator, GhostAI Music Generator.<br>
386
+ <b>Vision:</b> Redefine media creation with AI-driven precision.</p>
387
+ <p>
388
+ πŸ”— <a href="https://huggingface.co/ghostai1">HuggingFace Profile</a><br>
389
+ βœ‰οΈ <a href="mailto:ghostai@example.com">Contact Me</a>
390
+ </p>
391
+ </div>
392
  </div>
393
  """)
 
394
  with gr.TabItem("πŸ“Έ Showcase"):
395
+ gr.HTML("""
 
 
 
396
  <div class="info-grid">
397
  <div class="info-card">
398
+ <h2>✨ GhostPack Showcase</h2>
399
+ <p>Experience cinematic AI video creation with GhostPack, powered by advanced neural networks for fluid motion and stunning visuals. From cyberpunk cityscapes to alien jungles, GhostPack brings your imagination to life. Inspired by the success of the GhostAI Music Generator, this tool is your gateway to next-gen media creation.</p>
400
+ <div align="center">
401
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/2RH49llUgKsmWY7Hu8yBD.gif"
402
+ alt="GhostPack Animated Banner"
403
+ style="width:920px;height:260px;max-width:100%;border-radius:18px;box-shadow:0 0 48px #00ffcc;margin-bottom:36px;">
404
+ <p><b>GhostPack in Action:</b> Dynamic video generation with phantom-like precision.</p>
405
+ </div>
406
+ <div align="center">
407
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/k8pgUlg4OvdUZpbMNTcp5.gif"
408
+ alt="GhostPack Demo GIF"
409
+ style="width:470px;height:auto;border-radius:18px;box-shadow:0 0 32px #ff00ff;margin-bottom:28px;">
410
+ <p><b>Demo:</b> Create videos from a single image and prompt.</p>
411
+ </div>
412
+ <div style="display:flex;justify-content:center;gap:28px;">
413
+ <div>
414
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/7ABE2lOA4LOUtPfh1mhxP.png"
415
+ alt="Main Interface"
416
+ style="width:320px;height:auto;border-radius:12px;box-shadow:0 0 18px #00ffcc;">
417
+ <p><b>Main Interface:</b> Intuitive controls for video generation.</p>
418
+ </div>
419
+ <div>
420
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/9qNVRX2eM2iCY8xQKcOwW.png"
421
+ alt="Advanced Settings"
422
+ style="width:320px;height:auto;border-radius:12px;box-shadow:0 0 18px #00ffcc;">
423
+ <p><b>Settings:</b> Fine-tune for optimal performance.</p>
424
+ </div>
425
+ <div>
426
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/6421b1c68adc8881b974a89d/--fIS9ITg4-VqN22ySoa2.png"
427
+ alt="Logs Display"
428
+ style="width:320px;height:auto;border-radius:12px;box-shadow:0 0 18px #00ffcc;">
429
+ <p><b>Logs:</b> Monitor VRAM and generation progress.</p>
430
+ </div>
431
+ </div>
432
  </div>
433
  </div>
434
  """)
435
  blk.launch(server_name=args.server, server_port=args.port, share=args.share, inbrowser=args.inbrowser)
436
 
437
  if __name__ == "__main__":
438
+ run_ui()