zamalali commited on
Commit
7cf4dc6
·
1 Parent(s): b7128c2

Improve VL pipeline

Browse files
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
__pycache__/main.cpython-313.pyc ADDED
Binary file (16.2 kB). View file
 
app.py CHANGED
@@ -21,7 +21,7 @@ load_dotenv()
21
  if not os.getenv("HF_TOKEN"):
22
  raise ValueError("❌ Error: HF_TOKEN not found in .env file")
23
 
24
- @spaces.GPU
25
  def process_video(video_path, query, progress=gr.Progress()):
26
  """Scene‐filtering tab: remove scenes matching the query."""
27
  try:
@@ -77,7 +77,7 @@ def process_video(video_path, query, progress=gr.Progress()):
77
  except Exception as e:
78
  return None, f"❌ Error: {e}"
79
 
80
- @spaces.GPU
81
  def generate_video_description(video_path, progress=gr.Progress()):
82
  """Video‐description tab: full scene‐by‐scene summary."""
83
  try:
@@ -96,18 +96,26 @@ def generate_video_description(video_path, progress=gr.Progress()):
96
  except Exception as e:
97
  return f"❌ Error: {e}"
98
 
 
99
  @spaces.GPU
100
  def get_frame_description(video_path, frame_number):
101
- """Frame‐analysis tab: caption a single frame."""
102
  try:
103
- cap = cv2.VideoCapture(video_path)
104
- cap.set(cv2.CAP_PROP_POS_FRAMES, int(frame_number))
105
- ret, frame = cap.read()
106
- cap.release()
107
-
108
- if not ret:
109
- return "❌ Invalid frame number"
110
- return f"Frame {frame_number}:\n{generate_scene_caption(frame)}"
 
 
 
 
 
 
 
111
  except Exception as e:
112
  return f"❌ Error: {e}"
113
 
@@ -127,10 +135,12 @@ with gr.Blocks(theme=gr.themes.Soft(), css="""
127
  line-height: 1.5;
128
  }
129
  .tech-stack {
130
- background: #f5f5f5;
131
  padding: 1em;
132
  border-radius: 8px;
133
  margin: 1em 0;
 
 
134
  }
135
  """) as demo:
136
  gr.Markdown("""
@@ -139,8 +149,10 @@ with gr.Blocks(theme=gr.themes.Soft(), css="""
139
  A powerful playground for video analysis and manipulation using state-of-the-art Vision-Language models.
140
 
141
  <div class="description">
142
- This application demonstrates the capabilities of modern AI in video processing, offering a foundation for developers to build upon and optimize.
143
  Whether you're exploring scene detection, content filtering, or video summarization, Videoxity provides the tools to experiment with and enhance video understanding.
 
 
144
  </div>
145
 
146
  <div class="tech-stack">
@@ -158,7 +170,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css="""
158
  with gr.TabItem("Frames to Cut"):
159
  gr.Markdown("""
160
  ### Remove specific scenes from your video
161
- Upload a video and describe which scenes you want to remove. The AI will analyze each scene and cut out the matching ones.
162
 
163
  Examples:
164
  - "Remove the part where there is a cat in the video"
@@ -194,7 +206,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css="""
194
  with gr.TabItem("Video Description"):
195
  gr.Markdown("""
196
  ### Generate a comprehensive description of your video
197
- Get AI-generated descriptions for all scenes in your video.
198
  """)
199
  with gr.Row():
200
  with gr.Column():
@@ -215,28 +227,22 @@ with gr.Blocks(theme=gr.themes.Soft(), css="""
215
  # 3) Frame Analysis
216
  with gr.TabItem("Frame Analysis"):
217
  gr.Markdown("""
218
- ### Analyze specific frames in your video
219
- Get detailed descriptions for individual frames.
220
  """)
221
  with gr.Row():
222
  with gr.Column():
223
  vid3 = gr.Video(label="Upload Video")
224
- fn3 = gr.Number(
225
- label="Frame Number",
226
- value=0,
227
- precision=0,
228
- minimum=0
229
- )
230
- btn3 = gr.Button("Analyze Frame", variant="primary")
231
  with gr.Column():
232
  outFrm = gr.Textbox(
233
- label="Frame Description",
234
- lines=5,
235
  show_copy_button=True
236
  )
237
  btn3.click(
238
  fn=get_frame_description,
239
- inputs=[vid3, fn3],
240
  outputs=[outFrm]
241
  )
242
 
 
21
  if not os.getenv("HF_TOKEN"):
22
  raise ValueError("❌ Error: HF_TOKEN not found in .env file")
23
 
24
+
25
  def process_video(video_path, query, progress=gr.Progress()):
26
  """Scene‐filtering tab: remove scenes matching the query."""
27
  try:
 
77
  except Exception as e:
78
  return None, f"❌ Error: {e}"
79
 
80
+
81
  def generate_video_description(video_path, progress=gr.Progress()):
82
  """Video‐description tab: full scene‐by‐scene summary."""
83
  try:
 
96
  except Exception as e:
97
  return f"❌ Error: {e}"
98
 
99
+
100
  @spaces.GPU
101
  def get_frame_description(video_path, frame_number):
102
+ """Frame‐analysis tab: show scene descriptions."""
103
  try:
104
+ # Get scenes and keyframes
105
+ scenes = detect_scenes(video_path)
106
+ keyframes = extract_keyframes(video_path, scenes)
107
+
108
+ # Generate captions for all scenes
109
+ captions = [generate_scene_caption(frame) for _, frame in keyframes]
110
+
111
+ # Format the output with timestamps
112
+ output = []
113
+ for i, ((start, end), caption) in enumerate(zip(scenes, captions)):
114
+ start_time = start.get_seconds()
115
+ end_time = end.get_seconds()
116
+ output.append(f"Scene {i+1} ({start_time:.1f}s - {end_time:.1f}s):\n{caption}\n")
117
+
118
+ return "\n".join(output)
119
  except Exception as e:
120
  return f"❌ Error: {e}"
121
 
 
135
  line-height: 1.5;
136
  }
137
  .tech-stack {
138
+ background: var(--background-fill-secondary);
139
  padding: 1em;
140
  border-radius: 8px;
141
  margin: 1em 0;
142
+ border: 1px solid var(--border-color-primary);
143
+ color: var(--body-text-color);
144
  }
145
  """) as demo:
146
  gr.Markdown("""
 
149
  A powerful playground for video analysis and manipulation using state-of-the-art Vision-Language models.
150
 
151
  <div class="description">
152
+ This application demonstrates the capabilities of modern computer vision and natural language processing models in video processing, offering a foundation for developers to build upon and optimize.
153
  Whether you're exploring scene detection, content filtering, or video summarization, Videoxity provides the tools to experiment with and enhance video understanding.
154
+
155
+ ⚠️ Note: This demo is running entirely on CPU. For faster processing, either run it locally or duplicate the space.
156
  </div>
157
 
158
  <div class="tech-stack">
 
170
  with gr.TabItem("Frames to Cut"):
171
  gr.Markdown("""
172
  ### Remove specific scenes from your video
173
+ Upload a video and describe which scenes you want to remove. The BLIP Vision-Language model will analyze each scene and cut out the matching ones.
174
 
175
  Examples:
176
  - "Remove the part where there is a cat in the video"
 
206
  with gr.TabItem("Video Description"):
207
  gr.Markdown("""
208
  ### Generate a comprehensive description of your video
209
+ Get BLIP-generated scene descriptions and a Llama 3.1-powered narrative summary of your video.
210
  """)
211
  with gr.Row():
212
  with gr.Column():
 
227
  # 3) Frame Analysis
228
  with gr.TabItem("Frame Analysis"):
229
  gr.Markdown("""
230
+ ### Analyze scenes in your video
231
+ Get detailed scene descriptions using BLIP's image captioning model, with precise timestamps for each scene.
232
  """)
233
  with gr.Row():
234
  with gr.Column():
235
  vid3 = gr.Video(label="Upload Video")
236
+ btn3 = gr.Button("Analyze Scenes", variant="primary")
 
 
 
 
 
 
237
  with gr.Column():
238
  outFrm = gr.Textbox(
239
+ label="Scene Descriptions",
240
+ lines=15,
241
  show_copy_button=True
242
  )
243
  btn3.click(
244
  fn=get_frame_description,
245
+ inputs=[vid3],
246
  outputs=[outFrm]
247
  )
248