From ef4396af6c95bbd203e161fa4349a659742e3655 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Tue, 25 Jun 2024 08:57:04 +0200 Subject: [PATCH] Remove deprecated DownloadManager.download_custom --- src/datasets/download/download_manager.py | 51 +------------------ .../download/mock_download_manager.py | 4 -- 2 files changed, 1 insertion(+), 54 deletions(-) diff --git a/src/datasets/download/download_manager.py b/src/datasets/download/download_manager.py index b6c45d94c15..2c21b97872a 100644 --- a/src/datasets/download/download_manager.py +++ b/src/datasets/download/download_manager.py @@ -31,13 +31,11 @@ from .. import config from ..utils import tqdm as hf_tqdm -from ..utils.deprecation_utils import DeprecatedEnum, deprecated +from ..utils.deprecation_utils import DeprecatedEnum from ..utils.file_utils import ( ArchiveIterable, FilesIterable, cached_path, - get_from_cache, - hash_url_to_filename, is_relative_path, stack_multiprocessing_download_progress_bars, url_or_path_join, @@ -179,53 +177,6 @@ def _record_sizes_checksums(self, url_or_urls: NestedDataStructure, downloaded_p path, record_checksum=self.record_checksums ) - @deprecated("Use `.download`/`.download_and_extract` with `fsspec` URLs instead.") - def download_custom(self, url_or_urls, custom_download): - """ - Download given urls(s) by calling `custom_download`. - - Args: - url_or_urls (`str` or `list` or `dict`): - URL or `list` or `dict` of URLs to download and extract. Each URL is a `str`. - custom_download (`Callable[src_url, dst_path]`): - The source URL and destination path. For example - `tf.io.gfile.copy`, that lets you download from Google storage. - - Returns: - downloaded_path(s): `str`, The downloaded paths matching the given input - `url_or_urls`. - - Example: - - ```py - >>> downloaded_files = dl_manager.download_custom('s3://my-bucket/data.zip', custom_download_for_my_private_bucket) - ``` - """ - cache_dir = self.download_config.cache_dir or config.DOWNLOADED_DATASETS_PATH - max_retries = self.download_config.max_retries - - def url_to_downloaded_path(url): - return os.path.join(cache_dir, hash_url_to_filename(url)) - - downloaded_path_or_paths = map_nested(url_to_downloaded_path, url_or_urls) - url_or_urls = NestedDataStructure(url_or_urls) - downloaded_path_or_paths = NestedDataStructure(downloaded_path_or_paths) - for url, path in zip(url_or_urls.flatten(), downloaded_path_or_paths.flatten()): - try: - get_from_cache( - url, cache_dir=cache_dir, local_files_only=True, use_etag=False, max_retries=max_retries - ) - cached = True - except FileNotFoundError: - cached = False - if not cached or self.download_config.force_download: - custom_download(url, path) - get_from_cache( - url, cache_dir=cache_dir, local_files_only=True, use_etag=False, max_retries=max_retries - ) - self._record_sizes_checksums(url_or_urls, downloaded_path_or_paths) - return downloaded_path_or_paths.data - def download(self, url_or_urls): """Download given URL(s). diff --git a/src/datasets/download/mock_download_manager.py b/src/datasets/download/mock_download_manager.py index 7c71103a536..e7313610f78 100644 --- a/src/datasets/download/mock_download_manager.py +++ b/src/datasets/download/mock_download_manager.py @@ -130,10 +130,6 @@ def download_and_extract(self, data_url, *args): def download(self, data_url, *args): return self.download_and_extract(data_url) - # this function has to be in the manager under this name so that testing works - def download_custom(self, data_url, custom_download): - return self.download_and_extract(data_url) - # this function has to be in the manager under this name so that testing works def extract(self, path, *args, **kwargs): return path