-
Notifications
You must be signed in to change notification settings - Fork 2.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
221 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
from typing import List | ||
|
||
import datasets | ||
|
||
from ..folder_based_builder import folder_based_builder | ||
|
||
|
||
logger = datasets.utils.logging.get_logger(__name__) | ||
|
||
|
||
class VideoFolderConfig(folder_based_builder.FolderBasedBuilderConfig): | ||
"""BuilderConfig for ImageFolder.""" | ||
|
||
drop_labels: bool = None | ||
drop_metadata: bool = None | ||
|
||
def __post_init__(self): | ||
super().__post_init__() | ||
|
||
|
||
class VideoFolder(folder_based_builder.FolderBasedBuilder): | ||
BASE_FEATURE = datasets.Video | ||
BASE_COLUMN_NAME = "video" | ||
BUILDER_CONFIG_CLASS = VideoFolderConfig | ||
EXTENSIONS: List[str] # definition at the bottom of the script | ||
|
||
|
||
# TODO: initial list, we should check the compatibility of other formats | ||
VIDEO_EXTENSIONS = [ | ||
".mkv", | ||
".mp4", | ||
".avi", | ||
".mpeg", | ||
".mov", | ||
] | ||
VideoFolder.EXTENSIONS = VIDEO_EXTENSIONS |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
import pytest | ||
|
||
from datasets import Dataset, Features, Video | ||
|
||
from ..utils import require_decord | ||
|
||
|
||
@require_decord | ||
@pytest.mark.parametrize( | ||
"build_example", | ||
[ | ||
lambda video_path: video_path, | ||
lambda video_path: open(video_path, "rb").read(), | ||
lambda video_path: {"path": video_path}, | ||
lambda video_path: {"path": video_path, "bytes": None}, | ||
lambda video_path: {"path": video_path, "bytes": open(video_path, "rb").read()}, | ||
lambda video_path: {"path": None, "bytes": open(video_path, "rb").read()}, | ||
lambda video_path: {"bytes": open(video_path, "rb").read()}, | ||
], | ||
) | ||
def test_video_feature_encode_example(shared_datadir, build_example): | ||
from decord import VideoReader | ||
|
||
video_path = str(shared_datadir / "test_video_66x50.mov") | ||
video = Video() | ||
encoded_example = video.encode_example(build_example(video_path)) | ||
assert isinstance(encoded_example, dict) | ||
assert encoded_example.keys() == {"bytes", "path"} | ||
assert encoded_example["bytes"] is not None or encoded_example["path"] is not None | ||
decoded_example = video.decode_example(encoded_example) | ||
assert isinstance(decoded_example, VideoReader) | ||
|
||
|
||
@require_decord | ||
def test_dataset_with_video_feature(shared_datadir): | ||
from decord import VideoReader | ||
from decord.ndarray import NDArray | ||
|
||
video_path = str(shared_datadir / "test_video_66x50.mov") | ||
data = {"video": [video_path]} | ||
features = Features({"video": Video()}) | ||
dset = Dataset.from_dict(data, features=features) | ||
item = dset[0] | ||
assert item.keys() == {"video"} | ||
assert isinstance(item["video"], VideoReader) | ||
assert item["video"][0].shape == (50, 66, 3) | ||
assert isinstance(item["video"][0], NDArray) | ||
batch = dset[:1] | ||
assert len(batch) == 1 | ||
assert batch.keys() == {"video"} | ||
assert isinstance(batch["video"], list) and all(isinstance(item, VideoReader) for item in batch["video"]) | ||
assert batch["video"][0][0].shape == (50, 66, 3) | ||
assert isinstance(batch["video"][0][0], NDArray) | ||
column = dset["video"] | ||
assert len(column) == 1 | ||
assert isinstance(column, list) and all(isinstance(item, VideoReader) for item in column) | ||
assert column[0][0].shape == (50, 66, 3) | ||
assert isinstance(column[0][0], NDArray) | ||
|
||
# from bytes | ||
with open(video_path, "rb") as f: | ||
data = {"video": [f.read()]} | ||
dset = Dataset.from_dict(data, features=features) | ||
item = dset[0] | ||
assert item.keys() == {"video"} | ||
assert isinstance(item["video"], VideoReader) | ||
assert item["video"][0].shape == (50, 66, 3) | ||
assert isinstance(item["video"][0], NDArray) | ||
|
||
|
||
@require_decord | ||
def test_dataset_with_video_map_and_formatted(shared_datadir): | ||
import numpy as np | ||
from decord import VideoReader | ||
|
||
video_path = str(shared_datadir / "test_video_66x50.mov") | ||
data = {"video": [video_path]} | ||
features = Features({"video": Video()}) | ||
dset = Dataset.from_dict(data, features=features) | ||
dset = dset.map(lambda x: x).with_format("numpy") | ||
example = dset[0] | ||
assert isinstance(example["video"], VideoReader) | ||
assert isinstance(example["video"][0], np.ndarray) | ||
|
||
# from bytes | ||
with open(video_path, "rb") as f: | ||
data = {"video": [f.read()]} | ||
dset = Dataset.from_dict(data, features=features) | ||
dset = dset.map(lambda x: x).with_format("numpy") | ||
example = dset[0] | ||
assert isinstance(example["video"], VideoReader) | ||
assert isinstance(example["video"][0], np.ndarray) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters