From ec2bc84e16763ce06429040728d2a57b638938b2 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Wed, 24 Jul 2024 10:44:03 +0200 Subject: [PATCH] Add tests --- tests/test_file_utils.py | 57 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/tests/test_file_utils.py b/tests/test_file_utils.py index 201ae574442..8b3cd9d9dc5 100644 --- a/tests/test_file_utils.py +++ b/tests/test_file_utils.py @@ -12,6 +12,7 @@ from datasets.utils.file_utils import ( OfflineModeIsEnabled, _get_extraction_protocol, + _prepare_single_hop_path_and_storage_options, cached_path, fsspec_get, fsspec_head, @@ -90,7 +91,6 @@ def test_cached_path_protocols(protocol, monkeypatch, tmp_path): urls = {"hf": "hf://datasets/org-name/ds-name@main/filename.ext", "s3": "s3://bucket-name/filename.ext"} url = urls[protocol] _ = cached_path(url, download_config=download_config) - assert True for mock in [mock_fsspec_head, mock_fsspec_get]: assert mock.called assert mock.call_count == 1 @@ -197,6 +197,61 @@ def test_fsspec_offline(tmp_path_factory): fsspec_head("s3://huggingface.co") +@pytest.mark.parametrize( + "urlpath, download_config, expected_urlpath, expected_storage_options", + [ + ( + "https://huggingface.co/datasets/hf-internal-testing/dataset_with_script/resolve/main/some_text.txt", + DownloadConfig(), + "hf://datasets/hf-internal-testing/dataset_with_script@main/some_text.txt", + {"hf": {"endpoint": "https://huggingface.co", "token": None}}, + ), + ( + "https://huggingface.co/datasets/hf-internal-testing/dataset_with_script/resolve/main/some_text.txt", + DownloadConfig(token="MY-TOKEN"), + "hf://datasets/hf-internal-testing/dataset_with_script@main/some_text.txt", + {"hf": {"endpoint": "https://huggingface.co", "token": "MY-TOKEN"}}, + ), + ( + "https://huggingface.co/datasets/hf-internal-testing/dataset_with_script/resolve/main/some_text.txt", + DownloadConfig(token="MY-TOKEN", storage_options={"hf": {"on_error": "omit"}}), + "hf://datasets/hf-internal-testing/dataset_with_script@main/some_text.txt", + {"hf": {"endpoint": "https://huggingface.co", "token": "MY-TOKEN", "on_error": "omit"}}, + ), + ( + "https://domain.org/data.txt", + DownloadConfig(), + "https://domain.org/data.txt", + {"https": {"client_kwargs": {"trust_env": True}}}, + ), + ( + "https://domain.org/data.txt", + DownloadConfig(storage_options={"https": {"block_size": "omit"}}), + "https://domain.org/data.txt", + {"https": {"client_kwargs": {"trust_env": True}, "block_size": "omit"}}, + ), + ( + "https://domain.org/data.txt", + DownloadConfig(storage_options={"https": {"client_kwargs": {"raise_for_status": True}}}), + "https://domain.org/data.txt", + {"https": {"client_kwargs": {"trust_env": True, "raise_for_status": True}}}, + ), + ( + "https://domain.org/data.txt", + DownloadConfig(storage_options={"https": {"client_kwargs": {"trust_env": False}}}), + "https://domain.org/data.txt", + {"https": {"client_kwargs": {"trust_env": False}}}, + ), + ], +) +def test_prepare_single_hop_path_and_storage_options( + urlpath, download_config, expected_urlpath, expected_storage_options +): + prepared_urlpath, storage_options = _prepare_single_hop_path_and_storage_options(urlpath, download_config) + assert prepared_urlpath == expected_urlpath + assert storage_options == expected_storage_options + + class DummyTestFS(AbstractFileSystem): protocol = "mock" _file_class = AbstractBufferedFile