Spaces:

fau
/

videoxity

Running

App Files Files Community

zamalali commited on 17 days ago

Commit

7cf4dc6

1 Parent(s): b7128c2

Improve VL pipeline

Browse files

Files changed (3) hide show

.gradio/certificate.pem +31 -0
__pycache__/main.cpython-313.pyc +0 -0
app.py +33 -27

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

__pycache__/main.cpython-313.pyc ADDED Viewed

Binary file (16.2 kB). View file

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ load_dotenv()
 if not os.getenv("HF_TOKEN"):
     raise ValueError("❌ Error: HF_TOKEN not found in .env file")
-@spaces.GPU
 def process_video(video_path, query, progress=gr.Progress()):
     """Scene‐filtering tab: remove scenes matching the query."""
     try:
@@ -77,7 +77,7 @@ def process_video(video_path, query, progress=gr.Progress()):
     except Exception as e:
         return None, f"❌ Error: {e}"
-@spaces.GPU
 def generate_video_description(video_path, progress=gr.Progress()):
     """Video‐description tab: full scene‐by‐scene summary."""
     try:
@@ -96,18 +96,26 @@ def generate_video_description(video_path, progress=gr.Progress()):
     except Exception as e:
         return f"❌ Error: {e}"
 @spaces.GPU
 def get_frame_description(video_path, frame_number):
-    """Frame‐analysis tab: caption a single frame."""
     try:
-        cap = cv2.VideoCapture(video_path)
-        cap.set(cv2.CAP_PROP_POS_FRAMES, int(frame_number))
-        ret, frame = cap.read()
-        cap.release()
-        if not ret:
-            return "❌ Invalid frame number"
-        return f"Frame {frame_number}:\n{generate_scene_caption(frame)}"
     except Exception as e:
         return f"❌ Error: {e}"
@@ -127,10 +135,12 @@ with gr.Blocks(theme=gr.themes.Soft(), css="""
         line-height: 1.5;
     }
     .tech-stack {
-        background: #f5f5f5;
         padding: 1em;
         border-radius: 8px;
         margin: 1em 0;
     }
 """) as demo:
     gr.Markdown("""
@@ -139,8 +149,10 @@ with gr.Blocks(theme=gr.themes.Soft(), css="""
     A powerful playground for video analysis and manipulation using state-of-the-art Vision-Language models.
     <div class="description">
-    This application demonstrates the capabilities of modern AI in video processing, offering a foundation for developers to build upon and optimize.
     Whether you're exploring scene detection, content filtering, or video summarization, Videoxity provides the tools to experiment with and enhance video understanding.
     </div>
     <div class="tech-stack">
@@ -158,7 +170,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css="""
         with gr.TabItem("Frames to Cut"):
             gr.Markdown("""
             ### Remove specific scenes from your video
-            Upload a video and describe which scenes you want to remove. The AI will analyze each scene and cut out the matching ones.
             Examples:
             - "Remove the part where there is a cat in the video"
@@ -194,7 +206,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css="""
         with gr.TabItem("Video Description"):
             gr.Markdown("""
             ### Generate a comprehensive description of your video
-            Get AI-generated descriptions for all scenes in your video.
             """)
             with gr.Row():
                 with gr.Column():
@@ -215,28 +227,22 @@ with gr.Blocks(theme=gr.themes.Soft(), css="""
         # 3) Frame Analysis
         with gr.TabItem("Frame Analysis"):
             gr.Markdown("""
-            ### Analyze specific frames in your video
-            Get detailed descriptions for individual frames.
             """)
             with gr.Row():
                 with gr.Column():
                     vid3 = gr.Video(label="Upload Video")
-                    fn3 = gr.Number(
-                        label="Frame Number",
-                        value=0,
-                        precision=0,
-                        minimum=0
-                    )
-                    btn3 = gr.Button("Analyze Frame", variant="primary")
                 with gr.Column():
                     outFrm = gr.Textbox(
-                        label="Frame Description",
-                        lines=5,
                         show_copy_button=True
                     )
             btn3.click(
                 fn=get_frame_description,
-                inputs=[vid3, fn3],
                 outputs=[outFrm]
             )

 if not os.getenv("HF_TOKEN"):
     raise ValueError("❌ Error: HF_TOKEN not found in .env file")
 def process_video(video_path, query, progress=gr.Progress()):
     """Scene‐filtering tab: remove scenes matching the query."""
     try:
     except Exception as e:
         return None, f"❌ Error: {e}"
 def generate_video_description(video_path, progress=gr.Progress()):
     """Video‐description tab: full scene‐by‐scene summary."""
     try:
     except Exception as e:
         return f"❌ Error: {e}"
 @spaces.GPU
 def get_frame_description(video_path, frame_number):
+    """Frame‐analysis tab: show scene descriptions."""
     try:
+        # Get scenes and keyframes
+        scenes = detect_scenes(video_path)
+        keyframes = extract_keyframes(video_path, scenes)
+        # Generate captions for all scenes
+        captions = [generate_scene_caption(frame) for _, frame in keyframes]
+        # Format the output with timestamps
+        output = []
+        for i, ((start, end), caption) in enumerate(zip(scenes, captions)):
+            start_time = start.get_seconds()
+            end_time = end.get_seconds()
+            output.append(f"Scene {i+1} ({start_time:.1f}s - {end_time:.1f}s):\n{caption}\n")
+        return "\n".join(output)
     except Exception as e:
         return f"❌ Error: {e}"
         line-height: 1.5;
     }
     .tech-stack {
+        background: var(--background-fill-secondary);
         padding: 1em;
         border-radius: 8px;
         margin: 1em 0;
+        border: 1px solid var(--border-color-primary);
+        color: var(--body-text-color);
     }
 """) as demo:
     gr.Markdown("""
     A powerful playground for video analysis and manipulation using state-of-the-art Vision-Language models.
     <div class="description">
+    This application demonstrates the capabilities of modern computer vision and natural language processing models in video processing, offering a foundation for developers to build upon and optimize.
     Whether you're exploring scene detection, content filtering, or video summarization, Videoxity provides the tools to experiment with and enhance video understanding.
+    ⚠️ Note: This demo is running entirely on CPU. For faster processing, either run it locally or duplicate the space.
     </div>
     <div class="tech-stack">
         with gr.TabItem("Frames to Cut"):
             gr.Markdown("""
             ### Remove specific scenes from your video
+            Upload a video and describe which scenes you want to remove. The BLIP Vision-Language model will analyze each scene and cut out the matching ones.
             Examples:
             - "Remove the part where there is a cat in the video"
         with gr.TabItem("Video Description"):
             gr.Markdown("""
             ### Generate a comprehensive description of your video
+            Get BLIP-generated scene descriptions and a Llama 3.1-powered narrative summary of your video.
             """)
             with gr.Row():
                 with gr.Column():
         # 3) Frame Analysis
         with gr.TabItem("Frame Analysis"):
             gr.Markdown("""
+            ### Analyze scenes in your video
+            Get detailed scene descriptions using BLIP's image captioning model, with precise timestamps for each scene.
             """)
             with gr.Row():
                 with gr.Column():
                     vid3 = gr.Video(label="Upload Video")
+                    btn3 = gr.Button("Analyze Scenes", variant="primary")
                 with gr.Column():
                     outFrm = gr.Textbox(
+                        label="Scene Descriptions",
+                        lines=15,
                         show_copy_button=True
                     )
             btn3.click(
                 fn=get_frame_description,
+                inputs=[vid3],
                 outputs=[outFrm]
             )