pixio-video-stream

Runtime error

App Files Files Community

tsi-org commited on Jun 22

Commit

54e6494

verified ·

1 Parent(s): a30355f

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -88

app.py CHANGED Viewed

@@ -34,6 +34,7 @@ from tqdm import tqdm
 import imageio
 import av
 import uuid
 from pipeline import CausalInferencePipeline
 from demo_utils.constant import ZERO_VAE_CACHE
@@ -68,7 +69,7 @@ T2V_CINEMATIC_PROMPT = \
     '''1. For overly concise user inputs, reasonably infer and add details to make the video more complete and appealing without altering the original intent;\n''' \
     '''2. Enhance the main features in user descriptions (e.g., appearance, expression, quantity, race, posture, etc.), visual style, spatial relationships, and shot scales;\n''' \
     '''3. Output the entire prompt in English, retaining original text in quotes and titles, and preserving key input information;\n''' \
-    '''4. Prompts should match the user's intent and accurately reflect the specified style. If the user does not specify a style, choose the most appropriate style for the video;\n''' \
     '''5. Emphasize motion information and different camera movements present in the input description;\n''' \
     '''6. Your output should have natural motion attributes. For the target category described, add natural actions of the target using simple and direct verbs;\n''' \
     '''7. The revised prompt should be around 80-100 words long.\n''' \
@@ -146,75 +147,58 @@ APP_STATE = {
     "fp8_applied": False,
     "current_use_taehv": False,
     "current_vae_decoder": None,
 }
-# Store frames for download
-DOWNLOAD_FRAMES = []
-def frames_to_mp4_chunk(frames, filepath, fps=15):
     """
-    Convert frames to MP4 chunk using imageio (more compatible than .ts for Gradio streaming)
     """
     if not frames:
         return filepath
     try:
-        # Use imageio to create MP4 chunk
-        with imageio.get_writer(filepath, fps=fps, codec='libx264', quality=6) as writer:
-            for frame_np in frames:
-                writer.append_data(frame_np)
-        return filepath
-    except Exception as e:
-        print(f"❌ Error creating MP4 chunk: {e}")
-        # Fallback to PyAV if imageio fails
-        try:
-            height, width = frames[0].shape[:2]
-            container = av.open(filepath, mode='w', format='mp4')
-            stream = container.add_stream('h264', rate=fps)
-            stream.width = width
-            stream.height = height
-            stream.pix_fmt = 'yuv420p'
-            stream.options = {
-                'preset': 'ultrafast',
-                'tune': 'zerolatency',
-                'crf': '28'
-            }
-            for frame_np in frames:
-                frame = av.VideoFrame.from_ndarray(frame_np, format='rgb24')
-                frame = frame.reformat(format=stream.pix_fmt)
-                for packet in stream.encode(frame):
-                    container.mux(packet)
-            for packet in stream.encode():
                 container.mux(packet)
-            container.close()
-            return filepath
-        except Exception as e2:
-            print(f"❌ Both imageio and PyAV failed: {e2}")
-            return filepath
-def create_download_mp4():
-    global DOWNLOAD_FRAMES
-    if not DOWNLOAD_FRAMES:
-        return None
-    try:
-        os.makedirs("downloads", exist_ok=True)
-        timestamp = int(time.time())
-        mp4_path = f"downloads/video_{timestamp}.mp4"
-        with imageio.get_writer(mp4_path, fps=args.fps, codec='libx264', quality=8) as writer:
-            for frame in DOWNLOAD_FRAMES:
-                writer.append_data(frame)
-        print(f"✅ Download MP4 created: {mp4_path}")
-        return mp4_path
-    except Exception as e:
-        print(f"❌ Download error: {e}")
-        return None
 def initialize_vae_decoder(use_taehv=False, use_trt=False):
     if use_trt:
@@ -275,17 +259,15 @@ pipeline.to(dtype=torch.float16).to(gpu)
 @torch.no_grad()
 @spaces.GPU
-def video_generation_handler_streaming(prompt, seed=42, fps=15):
     """
-    Generator function that yields MP4 video chunks for streaming.
     """
-    global DOWNLOAD_FRAMES
-    DOWNLOAD_FRAMES = []  # Reset frames
     if seed == -1:
         seed = random.randint(0, 2**32 - 1)
-    print(f"🎬 Starting MP4 streaming: '{prompt}', seed: {seed}")
     # Setup
     conditional_dict = text_encoder(text_prompts=[prompt])
@@ -372,13 +354,14 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
             frame_np = np.transpose(frame_np, (1, 2, 0))  # CHW -> HWC
             all_frames_from_block.append(frame_np)
-            DOWNLOAD_FRAMES.append(frame_np)  # Store for download
             total_frames_yielded += 1
-            # Yield status update for each frame
             blocks_completed = idx
             current_block_progress = (frame_idx + 1) / pixels.shape[1]
             total_progress = (blocks_completed + current_block_progress) / num_blocks * 100
             total_progress = min(total_progress, 100.0)
             frame_status_html = (
@@ -393,21 +376,25 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
                 f"</div>"
             )
             yield None, frame_status_html
-        # Create MP4 chunk for this block
         if all_frames_from_block:
             print(f"📹 Encoding block {idx} with {len(all_frames_from_block)} frames")
             try:
                 chunk_uuid = str(uuid.uuid4())[:8]
-                mp4_filename = f"block_{idx:04d}_{chunk_uuid}.mp4"
-                mp4_path = os.path.join("gradio_tmp", mp4_filename)
-                frames_to_mp4_chunk(all_frames_from_block, mp4_path, fps)
-                # Yield the MP4 chunk
-                yield mp4_path, gr.update()
             except Exception as e:
                 print(f"⚠️ Error encoding block {idx}: {e}")
@@ -428,13 +415,41 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
         f"      📊 Generated {total_frames_yielded} frames across {num_blocks} blocks"
         f"    </p>"
         f"    <p style='margin: 4px 0 0 0; color: #0f5132; font-size: 14px;'>"
-        f"      🎬 Playback: {fps} FPS • 📁 Format: MP4/H.264 • 📥 Download ready!"
         f"    </p>"
         f"  </div>"
         f"</div>"
     )
     yield None, final_status_html
-    print(f"✅ MP4 streaming complete! {total_frames_yielded} frames across {num_blocks} blocks")
 # --- Gradio UI Layout ---
 with gr.Blocks(title="Self-Forcing Streaming Demo") as demo:
@@ -504,20 +519,31 @@ with gr.Blocks(title="Self-Forcing Streaming Demo") as demo:
                 label="Generation Status"
             )
-            # Download button
-            download_btn = gr.DownloadButton(
-                label="📥 Download MP4",
-                value=create_download_mp4,
-                variant="secondary"
-            )
     # Connect the generator to the streaming video
     start_btn.click(
-        fn=video_generation_handler_streaming,
         inputs=[prompt, seed, fps],
         outputs=[streaming_video, status_display]
     )
     enhance_button.click(
         fn=enhance_prompt,
         inputs=[prompt],
@@ -530,12 +556,10 @@ if __name__ == "__main__":
         import shutil
         shutil.rmtree("gradio_tmp")
     os.makedirs("gradio_tmp", exist_ok=True)
-    os.makedirs("downloads", exist_ok=True)
     print("🚀 Starting Self-Forcing Streaming Demo")
     print(f"📁 Temporary files will be stored in: gradio_tmp/")
-    print(f"📥 Download files will be stored in: downloads/")
-    print(f"🎯 Chunk encoding: MP4/H.264 (more compatible)")
     print(f"⚡ GPU acceleration: {gpu}")
     demo.queue().launch(
@@ -546,8 +570,6 @@ if __name__ == "__main__":
         max_threads=40,
         mcp_server=True
     )
 # import subprocess
 # subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

 import imageio
 import av
 import uuid
+import tempfile
 from pipeline import CausalInferencePipeline
 from demo_utils.constant import ZERO_VAE_CACHE
     '''1. For overly concise user inputs, reasonably infer and add details to make the video more complete and appealing without altering the original intent;\n''' \
     '''2. Enhance the main features in user descriptions (e.g., appearance, expression, quantity, race, posture, etc.), visual style, spatial relationships, and shot scales;\n''' \
     '''3. Output the entire prompt in English, retaining original text in quotes and titles, and preserving key input information;\n''' \
+    '''4. Prompts should match the user’s intent and accurately reflect the specified style. If the user does not specify a style, choose the most appropriate style for the video;\n''' \
     '''5. Emphasize motion information and different camera movements present in the input description;\n''' \
     '''6. Your output should have natural motion attributes. For the target category described, add natural actions of the target using simple and direct verbs;\n''' \
     '''7. The revised prompt should be around 80-100 words long.\n''' \
     "fp8_applied": False,
     "current_use_taehv": False,
     "current_vae_decoder": None,
+    "current_frames": [],
 }
+def frames_to_ts_file(frames, filepath, fps = 15):
     """
+    Convert frames directly to .ts file using PyAV.
+    Args:
+        frames: List of numpy arrays (HWC, RGB, uint8)
+        filepath: Output file path
+        fps: Frames per second
+    Returns:
+        The filepath of the created file
     """
     if not frames:
         return filepath
+    height, width = frames[0].shape[:2]
+    # Create container for MPEG-TS format
+    container = av.open(filepath, mode='w', format='mpegts')
+    # Add video stream with optimized settings for streaming
+    stream = container.add_stream('h264', rate=fps)
+    stream.width = width
+    stream.height = height
+    stream.pix_fmt = 'yuv420p'
+    # Optimize for low latency streaming
+    stream.options = {
+        'preset': 'ultrafast',
+        'tune': 'zerolatency',
+        'crf': '23',
+        'profile': 'baseline',
+        'level': '3.0'
+    }
     try:
+        for frame_np in frames:
+            frame = av.VideoFrame.from_ndarray(frame_np, format='rgb24')
+            frame = frame.reformat(format=stream.pix_fmt)
+            for packet in stream.encode(frame):
                 container.mux(packet)
+        for packet in stream.encode():
+            container.mux(packet)
+    finally:
+        container.close()
+    return filepath
 def initialize_vae_decoder(use_taehv=False, use_trt=False):
     if use_trt:
 @torch.no_grad()
 @spaces.GPU
+def video_generation_handler_streaming(prompt, seed=42, fps=15, save_frames=True):
     """
+    Generator function that yields .ts video chunks using PyAV for streaming.
+    Now optimized for block-based processing.
     """
     if seed == -1:
         seed = random.randint(0, 2**32 - 1)
+    print(f"🎬 Starting PyAV streaming: '{prompt}', seed: {seed}")
     # Setup
     conditional_dict = text_encoder(text_prompts=[prompt])
             frame_np = np.transpose(frame_np, (1, 2, 0))  # CHW -> HWC
             all_frames_from_block.append(frame_np)
             total_frames_yielded += 1
+            # Yield status update for each frame (cute tracking!)
             blocks_completed = idx
             current_block_progress = (frame_idx + 1) / pixels.shape[1]
             total_progress = (blocks_completed + current_block_progress) / num_blocks * 100
+            # Cap at 100% to avoid going over
             total_progress = min(total_progress, 100.0)
             frame_status_html = (
                 f"</div>"
             )
+            # Yield None for video but update status (frame-by-frame tracking)
             yield None, frame_status_html
+        # Encode entire block as one chunk immediately
         if all_frames_from_block:
             print(f"📹 Encoding block {idx} with {len(all_frames_from_block)} frames")
             try:
                 chunk_uuid = str(uuid.uuid4())[:8]
+                ts_filename = f"block_{idx:04d}_{chunk_uuid}.ts"
+                ts_path = os.path.join("gradio_tmp", ts_filename)
+                frames_to_ts_file(all_frames_from_block, ts_path, fps)
+                # Calculate final progress for this block
+                total_progress = (idx + 1) / num_blocks * 100
+                # Yield the actual video chunk
+                yield ts_path, gr.update()
             except Exception as e:
                 print(f"⚠️ Error encoding block {idx}: {e}")
         f"      📊 Generated {total_frames_yielded} frames across {num_blocks} blocks"
         f"    </p>"
         f"    <p style='margin: 4px 0 0 0; color: #0f5132; font-size: 14px;'>"
+        f"      🎬 Playback: {fps} FPS • 📁 Format: MPEG-TS/H.264"
         f"    </p>"
         f"  </div>"
         f"</div>"
     )
     yield None, final_status_html
+    print(f" PyAV streaming complete! {total_frames_yielded} frames across {num_blocks} blocks")
+def save_frames_as_video(frames, fps=15):
+    """
+    Convert frames to a downloadable MP4 video file.
+    Args:
+        frames: List of numpy arrays (HWC, RGB, uint8)
+        fps: Frames per second
+    Returns:
+        Path to the saved video file
+    """
+    if not frames:
+        return None
+    # Create a temporary file with a unique name
+    temp_file = os.path.join("gradio_tmp", f"download_{uuid.uuid4()}.mp4")
+    # Use imageio to write the video file
+    try:
+        writer = imageio.get_writer(temp_file, fps=fps, codec='h264', quality=9)
+        for frame in frames:
+            writer.append_data(frame)
+        writer.close()
+        return temp_file
+    except Exception as e:
+        print(f"Error saving video: {e}")
+        return None
 # --- Gradio UI Layout ---
 with gr.Blocks(title="Self-Forcing Streaming Demo") as demo:
                 label="Generation Status"
             )
+            download_btn = gr.Button("💾 Download Video", variant="secondary")
+            download_output = gr.File(label="Download")
     # Connect the generator to the streaming video
     start_btn.click(
+        fn=lambda p, s, f: (APP_STATE.update({"current_frames": []}) or video_generation_handler_streaming(p, s, f)),
         inputs=[prompt, seed, fps],
         outputs=[streaming_video, status_display]
     )
+    # Function to handle download button click
+    def download_video(fps):
+        if not APP_STATE.get("current_frames"):
+            return None
+        video_path = save_frames_as_video(APP_STATE["current_frames"], fps)
+        return video_path
+    # Connect download button
+    download_btn.click(
+        fn=download_video,
+        inputs=[fps],
+        outputs=[download_output],
+        show_progress=True
+    )
     enhance_button.click(
         fn=enhance_prompt,
         inputs=[prompt],
         import shutil
         shutil.rmtree("gradio_tmp")
     os.makedirs("gradio_tmp", exist_ok=True)
     print("🚀 Starting Self-Forcing Streaming Demo")
     print(f"📁 Temporary files will be stored in: gradio_tmp/")
+    print(f"🎯 Chunk encoding: PyAV (MPEG-TS/H.264)")
     print(f"⚡ GPU acceleration: {gpu}")
     demo.queue().launch(
         max_threads=40,
         mcp_server=True
     )
 # import subprocess
 # subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)