Skip to content

Commit

Permalink
Move info_utils errors to exceptions module (#6952)
Browse files Browse the repository at this point in the history
* Move info_utils errors to exceptions module

* Create new errors and deprecate old ones

* Replace deprecated errors

* Make deprecation backward compatible

* Test deprecated and non-deprecated errors

* Fix non-deprecated errors
  • Loading branch information
albertvillanova authored Jun 10, 2024
1 parent 9510252 commit 37a6036
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 36 deletions.
7 changes: 7 additions & 0 deletions src/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@
# Deprecated modules
from . import arrow_dataset as _arrow_dataset
from . import utils as _utils
from .exceptions import ExpectedMoreDownloadedFiles, ExpectedMoreSplits, UnexpectedDownloadedFile, UnexpectedSplits
from .utils import download_manager as _deprecated_download_manager
from .utils import info_utils as _deprecated_info_utils


_arrow_dataset.concatenate_datasets = concatenate_datasets
Expand All @@ -68,5 +70,10 @@
_deprecated_download_manager.DownloadConfig = DownloadConfig
_deprecated_download_manager.DownloadMode = DownloadMode
_deprecated_download_manager.DownloadManager = DownloadManager
_deprecated_info_utils.ExpectedMoreDownloadedFiles = ExpectedMoreDownloadedFiles
_deprecated_info_utils.ExpectedMoreSplits = ExpectedMoreSplits
_deprecated_info_utils.UnexpectedDownloadedFile = UnexpectedDownloadedFile
_deprecated_info_utils.UnexpectedSplits = UnexpectedSplits

del _arrow_dataset, _utils, _deprecated_download_manager
del _deprecated_info_utils, ExpectedMoreDownloadedFiles, ExpectedMoreSplits, UnexpectedDownloadedFile, UnexpectedSplits
111 changes: 111 additions & 0 deletions src/datasets/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from . import config
from .table import CastError
from .utils.deprecation_utils import deprecated
from .utils.track import TrackedIterable, tracked_list, tracked_str


Expand Down Expand Up @@ -83,3 +84,113 @@ def from_cast_error(
explanation_message += f"\n\nThis happened while the {builder_name} dataset builder was generating data using\n\n{', '.join(formatted_tracked_gen_kwargs)}"
help_message = "\n\nPlease either edit the data files to have matching columns, or separate them into different configurations (see docs at https://hf.co/docs/hub/datasets-manual-configuration#multiple-configurations)"
return cls("An error occurred while generating the dataset" + explanation_message + help_message)


@deprecated("Use 'ChecksumVerificationError' instead.")
class ChecksumVerificationException(Exception):
"""Exceptions during checksums verifications of downloaded files.
<Deprecated version="2.20.0">
Use `ChecksumVerificationError` instead.
</Deprecated>
"""


class ChecksumVerificationError(DatasetsError, ChecksumVerificationException):
"""Error raised during checksums verifications of downloaded files."""

def __init__(self, *args, **kwargs):
DatasetsError.__init__(self, *args, **kwargs)


@deprecated("Use 'UnexpectedDownloadedFileError' instead.")
class UnexpectedDownloadedFile(ChecksumVerificationException):
"""Some downloaded files were not expected.
<Deprecated version="2.20.0">
Use `UnexpectedDownloadedFileError` instead.
</Deprecated>
"""


class UnexpectedDownloadedFileError(ChecksumVerificationError, UnexpectedDownloadedFile):
"""Some downloaded files were not expected."""


@deprecated("Use 'ExpectedMoreDownloadedFilesError' instead.")
class ExpectedMoreDownloadedFiles(ChecksumVerificationException):
"""Some files were supposed to be downloaded but were not.
<Deprecated version="2.20.0">
Use `ExpectedMoreDownloadedFilesError` instead.
</Deprecated>
"""


class ExpectedMoreDownloadedFilesError(ChecksumVerificationError, ExpectedMoreDownloadedFiles):
"""Some files were supposed to be downloaded but were not."""


class NonMatchingChecksumError(ChecksumVerificationError):
"""The downloaded file checksum don't match the expected checksum."""


@deprecated("Use 'SplitsVerificationError' instead.")
class SplitsVerificationException(Exception):
"""Exceptions during splits verifications.
<Deprecated version="2.20.0">
Use `SplitsVerificationError` instead.
</Deprecated>
"""


class SplitsVerificationError(DatasetsError, SplitsVerificationException):
"""Error raised during splits verifications."""

def __init__(self, *args, **kwargs):
DatasetsError.__init__(self, *args, **kwargs)


@deprecated("Use 'UnexpectedSplitsError' instead.")
class UnexpectedSplits(SplitsVerificationException):
"""The expected splits of the downloaded file is missing.
<Deprecated version="2.20.0">
Use `UnexpectedSplitsError` instead.
</Deprecated>
"""


class UnexpectedSplitsError(SplitsVerificationError, UnexpectedSplits):
"""The expected splits of the downloaded file is missing."""


@deprecated("Use 'ExpectedMoreSplitsError' instead.")
class ExpectedMoreSplits(SplitsVerificationException):
"""Some recorded splits are missing.
<Deprecated version="2.20.0">
Use `ExpectedMoreSplitsError` instead.
</Deprecated>
"""


class ExpectedMoreSplitsError(SplitsVerificationError, ExpectedMoreSplits):
"""Some recorded splits are missing."""


class NonMatchingSplitsSizesError(SplitsVerificationError):
"""The splits sizes don't match the expected splits sizes."""
48 changes: 12 additions & 36 deletions src/datasets/utils/info_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@
from huggingface_hub.utils import insecure_hashlib

from .. import config
from ..exceptions import (
ExpectedMoreDownloadedFilesError,
ExpectedMoreSplitsError,
NonMatchingChecksumError,
NonMatchingSplitsSizesError,
UnexpectedDownloadedFileError,
UnexpectedSplitsError,
)
from .logging import get_logger


Expand Down Expand Up @@ -33,30 +41,14 @@ class VerificationMode(enum.Enum):
NO_CHECKS = "no_checks"


class ChecksumVerificationException(Exception):
"""Exceptions during checksums verifications of downloaded files."""


class UnexpectedDownloadedFile(ChecksumVerificationException):
"""Some downloaded files were not expected."""


class ExpectedMoreDownloadedFiles(ChecksumVerificationException):
"""Some files were supposed to be downloaded but were not."""


class NonMatchingChecksumError(ChecksumVerificationException):
"""The downloaded file checksum don't match the expected checksum."""


def verify_checksums(expected_checksums: Optional[dict], recorded_checksums: dict, verification_name=None):
if expected_checksums is None:
logger.info("Unable to verify checksums.")
return
if len(set(expected_checksums) - set(recorded_checksums)) > 0:
raise ExpectedMoreDownloadedFiles(str(set(expected_checksums) - set(recorded_checksums)))
raise ExpectedMoreDownloadedFilesError(str(set(expected_checksums) - set(recorded_checksums)))
if len(set(recorded_checksums) - set(expected_checksums)) > 0:
raise UnexpectedDownloadedFile(str(set(recorded_checksums) - set(expected_checksums)))
raise UnexpectedDownloadedFileError(str(set(recorded_checksums) - set(expected_checksums)))
bad_urls = [url for url in expected_checksums if expected_checksums[url] != recorded_checksums[url]]
for_verification_name = " for " + verification_name if verification_name is not None else ""
if len(bad_urls) > 0:
Expand All @@ -68,30 +60,14 @@ def verify_checksums(expected_checksums: Optional[dict], recorded_checksums: dic
logger.info("All the checksums matched successfully" + for_verification_name)


class SplitsVerificationException(Exception):
"""Exceptions during splis verifications"""


class UnexpectedSplits(SplitsVerificationException):
"""The expected splits of the downloaded file is missing."""


class ExpectedMoreSplits(SplitsVerificationException):
"""Some recorded splits are missing."""


class NonMatchingSplitsSizesError(SplitsVerificationException):
"""The splits sizes don't match the expected splits sizes."""


def verify_splits(expected_splits: Optional[dict], recorded_splits: dict):
if expected_splits is None:
logger.info("Unable to verify splits sizes.")
return
if len(set(expected_splits) - set(recorded_splits)) > 0:
raise ExpectedMoreSplits(str(set(expected_splits) - set(recorded_splits)))
raise ExpectedMoreSplitsError(str(set(expected_splits) - set(recorded_splits)))
if len(set(recorded_splits) - set(expected_splits)) > 0:
raise UnexpectedSplits(str(set(recorded_splits) - set(expected_splits)))
raise UnexpectedSplitsError(str(set(recorded_splits) - set(expected_splits)))
bad_splits = [
{"expected": expected_splits[name], "recorded": recorded_splits[name]}
for name in expected_splits
Expand Down
58 changes: 58 additions & 0 deletions tests/test_exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import warnings

import pytest

import datasets.utils.deprecation_utils
from datasets.exceptions import (
ChecksumVerificationError,
ChecksumVerificationException,
ExpectedMoreDownloadedFiles,
ExpectedMoreDownloadedFilesError,
ExpectedMoreSplits,
ExpectedMoreSplitsError,
NonMatchingChecksumError,
NonMatchingSplitsSizesError,
SplitsVerificationError,
SplitsVerificationException,
UnexpectedDownloadedFile,
UnexpectedDownloadedFileError,
UnexpectedSplits,
UnexpectedSplitsError,
)


@pytest.mark.parametrize(
"error",
[
ChecksumVerificationException,
UnexpectedDownloadedFile,
ExpectedMoreDownloadedFiles,
SplitsVerificationException,
UnexpectedSplits,
ExpectedMoreSplits,
],
)
def test_error_deprecated(error, monkeypatch):
monkeypatch.setattr(datasets.utils.deprecation_utils, "_emitted_deprecation_warnings", set())
with pytest.deprecated_call():
error()


@pytest.mark.parametrize(
"error",
[
ChecksumVerificationError,
UnexpectedDownloadedFileError,
ExpectedMoreDownloadedFilesError,
NonMatchingChecksumError,
SplitsVerificationError,
UnexpectedSplitsError,
ExpectedMoreSplitsError,
NonMatchingSplitsSizesError,
],
)
def test_error_not_deprecated(error, monkeypatch):
monkeypatch.setattr(datasets.utils.deprecation_utils, "_emitted_deprecation_warnings", set())
with warnings.catch_warnings():
warnings.simplefilter("error")
error()

0 comments on commit 37a6036

Please sign in to comment.