Skip to content

Commit

Permalink
Remove deprecated DownloadManager.download_custom
Browse files Browse the repository at this point in the history
  • Loading branch information
albertvillanova committed Jun 25, 2024
1 parent e8d56fd commit ef4396a
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 54 deletions.
51 changes: 1 addition & 50 deletions src/datasets/download/download_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,11 @@

from .. import config
from ..utils import tqdm as hf_tqdm
from ..utils.deprecation_utils import DeprecatedEnum, deprecated
from ..utils.deprecation_utils import DeprecatedEnum
from ..utils.file_utils import (
ArchiveIterable,
FilesIterable,
cached_path,
get_from_cache,
hash_url_to_filename,
is_relative_path,
stack_multiprocessing_download_progress_bars,
url_or_path_join,
Expand Down Expand Up @@ -179,53 +177,6 @@ def _record_sizes_checksums(self, url_or_urls: NestedDataStructure, downloaded_p
path, record_checksum=self.record_checksums
)

@deprecated("Use `.download`/`.download_and_extract` with `fsspec` URLs instead.")
def download_custom(self, url_or_urls, custom_download):
"""
Download given urls(s) by calling `custom_download`.
Args:
url_or_urls (`str` or `list` or `dict`):
URL or `list` or `dict` of URLs to download and extract. Each URL is a `str`.
custom_download (`Callable[src_url, dst_path]`):
The source URL and destination path. For example
`tf.io.gfile.copy`, that lets you download from Google storage.
Returns:
downloaded_path(s): `str`, The downloaded paths matching the given input
`url_or_urls`.
Example:
```py
>>> downloaded_files = dl_manager.download_custom('s3://my-bucket/data.zip', custom_download_for_my_private_bucket)
```
"""
cache_dir = self.download_config.cache_dir or config.DOWNLOADED_DATASETS_PATH
max_retries = self.download_config.max_retries

def url_to_downloaded_path(url):
return os.path.join(cache_dir, hash_url_to_filename(url))

downloaded_path_or_paths = map_nested(url_to_downloaded_path, url_or_urls)
url_or_urls = NestedDataStructure(url_or_urls)
downloaded_path_or_paths = NestedDataStructure(downloaded_path_or_paths)
for url, path in zip(url_or_urls.flatten(), downloaded_path_or_paths.flatten()):
try:
get_from_cache(
url, cache_dir=cache_dir, local_files_only=True, use_etag=False, max_retries=max_retries
)
cached = True
except FileNotFoundError:
cached = False
if not cached or self.download_config.force_download:
custom_download(url, path)
get_from_cache(
url, cache_dir=cache_dir, local_files_only=True, use_etag=False, max_retries=max_retries
)
self._record_sizes_checksums(url_or_urls, downloaded_path_or_paths)
return downloaded_path_or_paths.data

def download(self, url_or_urls):
"""Download given URL(s).
Expand Down
4 changes: 0 additions & 4 deletions src/datasets/download/mock_download_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,6 @@ def download_and_extract(self, data_url, *args):
def download(self, data_url, *args):
return self.download_and_extract(data_url)

# this function has to be in the manager under this name so that testing works
def download_custom(self, data_url, custom_download):
return self.download_and_extract(data_url)

# this function has to be in the manager under this name so that testing works
def extract(self, path, *args, **kwargs):
return path
Expand Down

0 comments on commit ef4396a

Please sign in to comment.