|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import unittest |
|
|
|
from huggingface_hub import VideoClassificationOutputElement, hf_hub_download |
|
|
|
from transformers import MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING, VideoMAEFeatureExtractor |
|
from transformers.pipelines import VideoClassificationPipeline, pipeline |
|
from transformers.testing_utils import ( |
|
compare_pipeline_output_to_hub_spec, |
|
is_pipeline_test, |
|
nested_simplify, |
|
require_av, |
|
require_tf, |
|
require_torch, |
|
require_torch_or_tf, |
|
require_vision, |
|
) |
|
|
|
from .test_pipelines_common import ANY |
|
|
|
|
|
@is_pipeline_test |
|
@require_torch_or_tf |
|
@require_vision |
|
@require_av |
|
class VideoClassificationPipelineTests(unittest.TestCase): |
|
model_mapping = MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING |
|
example_video_filepath = None |
|
|
|
@classmethod |
|
def _load_dataset(cls): |
|
|
|
if cls.example_video_filepath is None: |
|
cls.example_video_filepath = hf_hub_download( |
|
repo_id="nateraw/video-demo", filename="archery.mp4", repo_type="dataset" |
|
) |
|
|
|
def get_test_pipeline( |
|
self, |
|
model, |
|
tokenizer=None, |
|
image_processor=None, |
|
feature_extractor=None, |
|
processor=None, |
|
torch_dtype="float32", |
|
): |
|
self._load_dataset() |
|
video_classifier = VideoClassificationPipeline( |
|
model=model, |
|
tokenizer=tokenizer, |
|
feature_extractor=feature_extractor, |
|
image_processor=image_processor, |
|
processor=processor, |
|
torch_dtype=torch_dtype, |
|
top_k=2, |
|
) |
|
examples = [ |
|
self.example_video_filepath, |
|
|
|
|
|
] |
|
return video_classifier, examples |
|
|
|
def run_pipeline_test(self, video_classifier, examples): |
|
for example in examples: |
|
outputs = video_classifier(example) |
|
|
|
self.assertEqual( |
|
outputs, |
|
[ |
|
{"score": ANY(float), "label": ANY(str)}, |
|
{"score": ANY(float), "label": ANY(str)}, |
|
], |
|
) |
|
for element in outputs: |
|
compare_pipeline_output_to_hub_spec(element, VideoClassificationOutputElement) |
|
|
|
@require_torch |
|
def test_small_model_pt(self): |
|
small_model = "hf-internal-testing/tiny-random-VideoMAEForVideoClassification" |
|
small_feature_extractor = VideoMAEFeatureExtractor( |
|
size={"shortest_edge": 10}, crop_size={"height": 10, "width": 10} |
|
) |
|
video_classifier = pipeline( |
|
"video-classification", model=small_model, feature_extractor=small_feature_extractor, frame_sampling_rate=4 |
|
) |
|
|
|
video_file_path = hf_hub_download(repo_id="nateraw/video-demo", filename="archery.mp4", repo_type="dataset") |
|
output = video_classifier(video_file_path, top_k=2) |
|
self.assertEqual( |
|
nested_simplify(output, decimals=4), |
|
[{"score": 0.5199, "label": "LABEL_0"}, {"score": 0.4801, "label": "LABEL_1"}], |
|
) |
|
for element in output: |
|
compare_pipeline_output_to_hub_spec(element, VideoClassificationOutputElement) |
|
|
|
outputs = video_classifier( |
|
[ |
|
video_file_path, |
|
video_file_path, |
|
], |
|
top_k=2, |
|
) |
|
self.assertEqual( |
|
nested_simplify(outputs, decimals=4), |
|
[ |
|
[{"score": 0.5199, "label": "LABEL_0"}, {"score": 0.4801, "label": "LABEL_1"}], |
|
[{"score": 0.5199, "label": "LABEL_0"}, {"score": 0.4801, "label": "LABEL_1"}], |
|
], |
|
) |
|
for output in outputs: |
|
for element in output: |
|
compare_pipeline_output_to_hub_spec(element, VideoClassificationOutputElement) |
|
|
|
@require_tf |
|
@unittest.skip |
|
def test_small_model_tf(self): |
|
pass |
|
|