Spaces:

Luigi
/

SmolVLM2-on-llama.cpp

Running

App Files Files Community

Luigi commited on 20 days ago

Commit

292fb3c

1 Parent(s): 2529cb3

decouple inference from streaming

Browse files

Files changed (1) hide show

app.py +32 -1

app.py CHANGED Viewed

@@ -122,18 +122,49 @@ RTC_CONFIG = RTCConfiguration({
     "iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]
 })
 class CaptionProcessor(VideoProcessorBase):
     def __init__(self):
         self.interval = 1.0
         self.last_time = time.time()
         self.caption = ""
     def recv(self, frame: av.VideoFrame) -> av.VideoFrame:
         img = frame.to_ndarray(format="bgr24")
         now = time.time()
         if now - self.last_time >= self.interval:
             self.last_time = now
-            self.caption = caption_frame(img)
         return av.VideoFrame.from_ndarray(img, format="bgr24")
 ctx = webrtc_streamer(

     "iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]
 })
+import concurrent.futures
 class CaptionProcessor(VideoProcessorBase):
     def __init__(self):
         self.interval = 1.0
         self.last_time = time.time()
         self.caption = ""
+        self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
+        self.future = None
     def recv(self, frame: av.VideoFrame) -> av.VideoFrame:
         img = frame.to_ndarray(format="bgr24")
         now = time.time()
+        # 1) Schedule a new inference if interval has passed and previous is done
         if now - self.last_time >= self.interval:
             self.last_time = now
+            # only submit if there isn't already a running task
+            if self.future is None or self.future.done():
+                # copy the frame so that downstream modifying code can't clash
+                img_copy = img.copy()
+                self.future = self.executor.submit(caption_frame, img_copy)
+        # 2) If the background task finished, grab its result
+        if self.future and self.future.done():
+            try:
+                self.caption = self.future.result()
+            except Exception as e:
+                self.caption = f"[error: {e}]"
+            self.future = None
+        # 3) Draw the **last** caption onto every frame immediately
+        cv2.putText(
+            img,
+            self.caption or "_…thinking…_",
+            org=(10, img.shape[0] - 20),
+            fontFace=cv2.FONT_HERSHEY_SIMPLEX,
+            fontScale=0.6,
+            color=(255, 255, 255),
+            thickness=2,
+            lineType=cv2.LINE_AA,
+        )
         return av.VideoFrame.from_ndarray(img, format="bgr24")
 ctx = webrtc_streamer(