Spaces:

segments
/

count-by-class

Running

App Files Files Community

Tomatillo commited on 21 days ago

Commit

60f5dd2

verified ·

1 Parent(s): dc853ae

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +122 -67

src/streamlit_app.py CHANGED Viewed

@@ -1,8 +1,13 @@
 import streamlit as st
 import io
 import csv
-from datetime import datetime
 from segments import SegmentsClient
 from get_labels_from_samples import (
     get_samples as get_samples_objects,
     export_frames_and_annotations,
@@ -44,6 +49,95 @@ def parse_classes(input_str: str) -> list:
     return sorted(set(classes))
 def generate_csv(metrics: list, dataset_identifier: str) -> str:
     """
     Generate CSV content from list of per-sample metrics.
@@ -102,6 +196,9 @@ if api_key and dataset_identifier:
 if is_multisensor:
     sensor_select = st.selectbox("Choose sensor (optional)", options=['All sensors'] + sensor_names)
 if run_button:
     st.session_state.csv_content = None
     st.session_state.error = None
@@ -122,75 +219,33 @@ if run_button:
             st.info("Checking dataset type...")
         try:
             target_classes = parse_classes(classes_input)
-            client = init_client(api_key)
             metrics = []
             # Update loader after dataset type check
             if status_ctx is not None:
                 status_ctx.update(label="Dataset type checked. Processing samples...", state="running")
-            for sample in samples_objects:
-                try:
-                    label = client.get_label(sample.uuid)
-                    labelset = getattr(label, 'labelset', '') or ''
-                    labeled_by = getattr(label, 'created_by', '') or ''
-                    reviewed_by = getattr(label, 'reviewed_by', '') or ''
-                    if is_multisensor and sensor_select and sensor_select != 'All sensors':
-                        frames_list = export_sensor_frames_and_annotations(label, sensor_select)
-                        sensor_val = sensor_select
-                        num_frames = len(frames_list)
-                        total_annotations = sum(len(f['annotations']) for f in frames_list)
-                        matching_annotations = sum(
-                            1
-                            for f in frames_list
-                            for ann in f['annotations']
-                            if getattr(ann, 'category_id', None) in target_classes
-                        )
-                    elif is_multisensor and (not sensor_select or sensor_select == 'All sensors'):
-                        all_sensor_frames = export_all_sensor_frames_and_annotations(label)
-                        for sensor_name, frames_list in all_sensor_frames.items():
-                            num_frames = len(frames_list)
-                            total_annotations = sum(len(f['annotations']) for f in frames_list)
-                            matching_annotations = sum(
-                                1
-                                for f in frames_list
-                                for ann in f['annotations']
-                                if getattr(ann, 'category_id', None) in target_classes
-                            )
-                            metrics.append({
-                                'name': getattr(sample, 'name', sample.uuid),
-                                'uuid': sample.uuid,
-                                'labelset': labelset,
-                                'sensor': sensor_name,
-                                'num_frames': num_frames,
-                                'total_annotations': total_annotations,
-                                'matching_annotations': matching_annotations,
-                                'labeled_by': labeled_by,
-                                'reviewed_by': reviewed_by
-                            })
-                        continue
-                    else:
-                        frames_list = export_frames_and_annotations(label)
-                        sensor_val = ''
-                        num_frames = len(frames_list)
-                        total_annotations = sum(len(f['annotations']) for f in frames_list)
-                        matching_annotations = sum(
-                            1
-                            for f in frames_list
-                            for ann in f['annotations']
-                            if getattr(ann, 'category_id', None) in target_classes
-                        )
-                    metrics.append({
-                        'name': getattr(sample, 'name', sample.uuid),
-                        'uuid': sample.uuid,
-                        'labelset': labelset,
-                        'sensor': sensor_val if is_multisensor else '',
-                        'num_frames': num_frames,
-                        'total_annotations': total_annotations,
-                        'matching_annotations': matching_annotations,
-                        'labeled_by': labeled_by,
-                        'reviewed_by': reviewed_by
-                    })
-                except Exception as e:
-                    continue
             if not metrics:
                 st.session_state.error = "No metrics could be generated for the dataset."
             else:
@@ -213,4 +268,4 @@ if st.session_state.csv_content:
         data=st.session_state.csv_content,
         file_name=filename,
         mime="text/csv"
-    )

+#!/usr/bin/env python3
 import streamlit as st
 import io
 import csv
+import concurrent.futures
 from segments import SegmentsClient
+from datetime import datetime
+import sys
+import os
 from get_labels_from_samples import (
     get_samples as get_samples_objects,
     export_frames_and_annotations,
     return sorted(set(classes))
+def _count_from_frames(frames, target_set):
+    """Helper to count frames, total annotations, and matching annotations directly."""
+    if not frames:
+        return 0, 0, 0
+    num_frames = len(frames)
+    total_annotations = 0
+    matching_annotations = 0
+    for f in frames:
+        anns = getattr(f, 'annotations', [])
+        total_annotations += len(anns)
+        if target_set:
+            for ann in anns:
+                if getattr(ann, 'category_id', None) in target_set:
+                    matching_annotations += 1
+    return num_frames, total_annotations, matching_annotations
+def compute_metrics_for_sample(sample, api_key, target_set, is_multisensor, sensor_select):
+    """
+    Fetch label for a single sample and compute metrics.
+    Returns a list of metric dicts (one per sensor if 'All sensors', otherwise one).
+    """
+    try:
+        client = init_client(api_key)
+        label = client.get_label(sample.uuid)
+        labelset = getattr(label, 'labelset', '') or ''
+        labeled_by = getattr(label, 'created_by', '') or ''
+        reviewed_by = getattr(label, 'reviewed_by', '') or ''
+        metrics_rows = []
+        if is_multisensor:
+            sensors = getattr(getattr(label, 'attributes', None), 'sensors', None) or []
+            if sensor_select and sensor_select != 'All sensors':
+                # single sensor
+                for sensor in sensors:
+                    if getattr(sensor, 'name', None) == sensor_select:
+                        frames = getattr(getattr(sensor, 'attributes', None), 'frames', [])
+                        num_frames, total_annotations, matching_annotations = _count_from_frames(frames, target_set)
+                        metrics_rows.append({
+                            'name': getattr(sample, 'name', sample.uuid),
+                            'uuid': sample.uuid,
+                            'labelset': labelset,
+                            'sensor': sensor_select,
+                            'num_frames': num_frames,
+                            'total_annotations': total_annotations,
+                            'matching_annotations': matching_annotations,
+                            'labeled_by': labeled_by,
+                            'reviewed_by': reviewed_by
+                        })
+                        break
+            else:
+                # all sensors
+                for sensor in sensors:
+                    sensor_name = getattr(sensor, 'name', 'Unknown')
+                    frames = getattr(getattr(sensor, 'attributes', None), 'frames', [])
+                    num_frames, total_annotations, matching_annotations = _count_from_frames(frames, target_set)
+                    metrics_rows.append({
+                        'name': getattr(sample, 'name', sample.uuid),
+                        'uuid': sample.uuid,
+                        'labelset': labelset,
+                        'sensor': sensor_name,
+                        'num_frames': num_frames,
+                        'total_annotations': total_annotations,
+                        'matching_annotations': matching_annotations,
+                        'labeled_by': labeled_by,
+                        'reviewed_by': reviewed_by
+                    })
+        else:
+            # single-sensor dataset
+            frames = getattr(getattr(label, 'attributes', None), 'frames', [])
+            num_frames, total_annotations, matching_annotations = _count_from_frames(frames, target_set)
+            metrics_rows.append({
+                'name': getattr(sample, 'name', sample.uuid),
+                'uuid': sample.uuid,
+                'labelset': labelset,
+                'sensor': '',
+                'num_frames': num_frames,
+                'total_annotations': total_annotations,
+                'matching_annotations': matching_annotations,
+                'labeled_by': labeled_by,
+                'reviewed_by': reviewed_by
+            })
+        return metrics_rows
+    except Exception:
+        return []
 def generate_csv(metrics: list, dataset_identifier: str) -> str:
     """
     Generate CSV content from list of per-sample metrics.
 if is_multisensor:
     sensor_select = st.selectbox("Choose sensor (optional)", options=['All sensors'] + sensor_names)
+# Concurrency control
+parallel_workers = st.slider("Parallel requests", min_value=1, max_value=32, value=8, help="Increase to speed up processing; lower if you hit API limits.")
 if run_button:
     st.session_state.csv_content = None
     st.session_state.error = None
             st.info("Checking dataset type...")
         try:
             target_classes = parse_classes(classes_input)
+            target_set = set(target_classes)
             metrics = []
             # Update loader after dataset type check
             if status_ctx is not None:
                 status_ctx.update(label="Dataset type checked. Processing samples...", state="running")
+            progress = st.progress(0)
+            total = len(samples_objects)
+            done = 0
+            with concurrent.futures.ThreadPoolExecutor(max_workers=parallel_workers) as executor:
+                futures = [
+                    executor.submit(
+                        compute_metrics_for_sample,
+                        sample,
+                        api_key,
+                        target_set,
+                        is_multisensor,
+                        sensor_select,
+                    )
+                    for sample in samples_objects
+                ]
+                for future in concurrent.futures.as_completed(futures):
+                    rows = future.result()
+                    if rows:
+                        metrics.extend(rows)
+                    done += 1
+                    if total:
+                        progress.progress(min(done / total, 1.0))
             if not metrics:
                 st.session_state.error = "No metrics could be generated for the dataset."
             else:
         data=st.session_state.csv_content,
         file_name=filename,
         mime="text/csv"
+    )