Skip to content

Commit

Permalink
fix expected splits when passing data_files or dir
Browse files Browse the repository at this point in the history
  • Loading branch information
lhoestq committed May 31, 2024
1 parent 456f790 commit 69a3566
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 2 deletions.
2 changes: 2 additions & 0 deletions src/datasets/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -2277,6 +2277,8 @@ def load_dataset_builder(
)
dataset_name = builder_kwargs.pop("dataset_name", None)
info = dataset_module.dataset_infos.get(config_name) if dataset_module.dataset_infos else None
if data_dir or data_files:
info.splits = None

if (
path in _PACKAGED_DATASETS_MODULES
Expand Down
13 changes: 11 additions & 2 deletions tests/test_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -1210,15 +1210,24 @@ def test_load_dataset_builder_for_community_dataset_with_script_no_parquet_expor


@pytest.mark.integration
def test_load_dataset_builder_use_parquet_export_if_dont_trust_remote_code_keeps_features():
def test_load_dataset_builder_load_features_and_splits_info():
dataset_name = "food101"
builder = datasets.load_dataset_builder(dataset_name, trust_remote_code=False)
builder = datasets.load_dataset_builder(dataset_name)
assert isinstance(builder, DatasetBuilder)
assert builder.name == "parquet"
assert builder.dataset_name == dataset_name
assert builder.config.name == "default"
assert list(builder.info.features) == ["image", "label"]
assert builder.info.features["image"] == Image()
assert builder.info.splits is not None
builder = datasets.load_dataset_builder(dataset_name, data_files="data/validation-00000-of-00003.parquet")
assert isinstance(builder, DatasetBuilder)
assert builder.name == "parquet"
assert builder.dataset_name == dataset_name
assert builder.config.name == "default"
assert list(builder.info.features) == ["image", "label"]
assert builder.info.features["image"] == Image()
assert builder.info.splits is None


@pytest.mark.integration
Expand Down

0 comments on commit 69a3566

Please sign in to comment.