diff --git a/.github/conda/meta.yaml b/.github/conda/meta.yaml
index 4c62ea5c6a5..d8f11e3c10b 100644
--- a/.github/conda/meta.yaml
+++ b/.github/conda/meta.yaml
@@ -24,7 +24,7 @@ requirements:
     - dataclasses
     - multiprocess
     - fsspec
-    - huggingface_hub >=0.22.0,<1.0.0
+    - huggingface_hub >=0.23.0,<1.0.0
     - packaging
     - aiohttp
   run:
@@ -41,7 +41,7 @@ requirements:
     - dataclasses
     - multiprocess
     - fsspec
-    - huggingface_hub >=0.22.0,<1.0.0
+    - huggingface_hub >=0.23.0,<1.0.0
     - packaging
     - aiohttp
 
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 383c96332c8..2951be28289 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -62,7 +62,7 @@ jobs:
         run: uv pip install --system --upgrade pyarrow huggingface-hub "dill<0.3.9"
       - name: Install dependencies (minimum versions)
         if: ${{ matrix.deps_versions != 'deps-latest' }}
-        run: uv pip install --system pyarrow==15.0.0 huggingface-hub==0.22.0 transformers dill==0.3.1.1
+        run: uv pip install --system pyarrow==15.0.0 huggingface-hub==0.23.5 transformers dill==0.3.1.1
       - name: Test with pytest
         run: |
           python -m pytest -rfExX -m ${{ matrix.test }} -n 2 --dist loadfile -sv ./tests/
diff --git a/docs/source/image_dataset.mdx b/docs/source/image_dataset.mdx
index 8dd9ac2c1fa..43559cbd2d7 100644
--- a/docs/source/image_dataset.mdx
+++ b/docs/source/image_dataset.mdx
@@ -2,7 +2,7 @@
 
 There are two methods for creating and sharing an image dataset. This guide will show you how to:
 
-* Create an audio dataset from local files in python with [`Dataset.push_to_hub`]. This is an easy way that requires only a few steps in python.
+* Create an image dataset from local files in python with [`Dataset.push_to_hub`]. This is an easy way that requires only a few steps in python.
 
 * Create an image dataset with `ImageFolder` and some metadata. This is a no-code solution for quickly creating an image dataset with several thousand images.
 
diff --git a/setup.py b/setup.py
index 10e6f5cda08..3901221c39a 100644
--- a/setup.py
+++ b/setup.py
@@ -133,7 +133,7 @@
     # for data streaming via http
     "aiohttp",
     # To get datasets from the Datasets Hub on huggingface.co
-    "huggingface-hub>=0.22.0",
+    "huggingface-hub>=0.23.0",
     # Utilities from PyPA to e.g., compare versions
     "packaging",
     # To parse YAML metadata from dataset cards
@@ -235,7 +235,7 @@
 
 setup(
     name="datasets",
-    version="3.0.2.dev0",  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
+    version="3.0.3.dev0",  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
     description="HuggingFace community-driven open-source library of datasets",
     long_description=open("README.md", encoding="utf-8").read(),
     long_description_content_type="text/markdown",
diff --git a/src/datasets/__init__.py b/src/datasets/__init__.py
index 7bbb2cd7666..f72cab6c89f 100644
--- a/src/datasets/__init__.py
+++ b/src/datasets/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "3.0.2.dev0"
+__version__ = "3.0.3.dev0"
 
 from .arrow_dataset import Dataset
 from .arrow_reader import ReadInstruction
diff --git a/src/datasets/download/download_manager.py b/src/datasets/download/download_manager.py
index 6ccb4f9d1c9..a8dff37aeef 100644
--- a/src/datasets/download/download_manager.py
+++ b/src/datasets/download/download_manager.py
@@ -189,7 +189,11 @@ def _download_batched(
             download_func = partial(self._download_single, download_config=download_config)
 
             fs: fsspec.AbstractFileSystem
-            fs, path = url_to_fs(url_or_filenames[0], **download_config.storage_options)
+            path = str(url_or_filenames[0])
+            if is_relative_path(path):
+                # append the relative path to the base_path
+                path = url_or_path_join(self._base_path, path)
+            fs, path = url_to_fs(path, **download_config.storage_options)
             size = 0
             try:
                 size = fs.info(path).get("size", 0)
diff --git a/src/datasets/load.py b/src/datasets/load.py
index 458b917c4f5..0faf2fd5cb5 100644
--- a/src/datasets/load.py
+++ b/src/datasets/load.py
@@ -36,10 +36,18 @@
 import requests
 import yaml
 from fsspec.core import url_to_fs
-from huggingface_hub import DatasetCard, DatasetCardData, HfApi, HfFileSystem
-from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError, RevisionNotFoundError, get_session
+from huggingface_hub import DatasetCard, DatasetCardData, HfApi
+from huggingface_hub.utils import (
+    EntryNotFoundError,
+    GatedRepoError,
+    LocalEntryNotFoundError,
+    OfflineModeIsEnabled,
+    RepositoryNotFoundError,
+    RevisionNotFoundError,
+    get_session,
+)
 
-from . import config
+from . import __version__, config
 from .arrow_dataset import Dataset
 from .builder import BuilderConfig, DatasetBuilder
 from .data_files import (
@@ -73,7 +81,6 @@
 from .splits import Split
 from .utils import _dataset_viewer
 from .utils.file_utils import (
-    OfflineModeIsEnabled,
     _raise_if_offline_mode_is_enabled,
     cached_path,
     get_datasets_user_agent,
@@ -82,7 +89,7 @@
     relative_to_absolute_path,
     url_or_path_join,
 )
-from .utils.hub import check_auth, hf_dataset_url
+from .utils.hub import hf_dataset_url
 from .utils.info_utils import VerificationMode, is_small_dataset
 from .utils.logging import get_logger
 from .utils.metadata import MetadataConfigs
@@ -974,49 +981,48 @@ class HubDatasetModuleFactoryWithoutScript(_DatasetModuleFactory):
     def __init__(
         self,
         name: str,
-        revision: Optional[Union[str, Version]] = None,
+        commit_hash: str,
         data_dir: Optional[str] = None,
         data_files: Optional[Union[str, List, Dict]] = None,
         download_config: Optional[DownloadConfig] = None,
         download_mode: Optional[Union[DownloadMode, str]] = None,
+        use_exported_dataset_infos: bool = False,
     ):
         self.name = name
-        self.revision = revision
+        self.commit_hash = commit_hash
         self.data_files = data_files
         self.data_dir = data_dir
         self.download_config = download_config or DownloadConfig()
         self.download_mode = download_mode
+        self.use_exported_dataset_infos = use_exported_dataset_infos
         increase_load_count(name)
 
     def get_module(self) -> DatasetModule:
-        hfh_dataset_info = HfApi(config.HF_ENDPOINT).dataset_info(
-            self.name,
-            revision=self.revision,
+        # Get the Dataset Card and fix the revision in case there are new commits in the meantime
+        api = HfApi(
+            endpoint=config.HF_ENDPOINT,
             token=self.download_config.token,
-            timeout=100.0,
+            library_name="datasets",
+            library_version=__version__,
+            user_agent=get_datasets_user_agent(self.download_config.user_agent),
         )
-        # even if metadata_configs is not None (which means that we will resolve files for each config later)
-        # we cannot skip resolving all files because we need to infer module name by files extensions
-        revision = hfh_dataset_info.sha  # fix the revision in case there are new commits in the meantime
-        base_path = f"hf://datasets/{self.name}@{revision}/{self.data_dir or ''}".rstrip("/")
-
-        download_config = self.download_config.copy()
-        if download_config.download_desc is None:
-            download_config.download_desc = "Downloading readme"
         try:
-            dataset_readme_path = cached_path(
-                hf_dataset_url(self.name, config.REPOCARD_FILENAME, revision=revision),
-                download_config=download_config,
+            dataset_readme_path = api.hf_hub_download(
+                repo_id=self.name,
+                filename=config.REPOCARD_FILENAME,
+                repo_type="dataset",
+                revision=self.commit_hash,
+                proxies=self.download_config.proxies,
             )
-            dataset_card_data = DatasetCard.load(Path(dataset_readme_path)).data
-        except FileNotFoundError:
+            dataset_card_data = DatasetCard.load(dataset_readme_path).data
+        except EntryNotFoundError:
             dataset_card_data = DatasetCardData()
         download_config = self.download_config.copy()
         if download_config.download_desc is None:
             download_config.download_desc = "Downloading standalone yaml"
         try:
             standalone_yaml_path = cached_path(
-                hf_dataset_url(self.name, config.REPOYAML_FILENAME, revision=revision),
+                hf_dataset_url(self.name, config.REPOYAML_FILENAME, revision=self.commit_hash),
                 download_config=download_config,
             )
             with open(standalone_yaml_path, "r", encoding="utf-8") as f:
@@ -1027,17 +1033,13 @@ def get_module(self) -> DatasetModule:
                     dataset_card_data = DatasetCardData(**_dataset_card_data_dict)
         except FileNotFoundError:
             pass
+        base_path = f"hf://datasets/{self.name}@{self.commit_hash}/{self.data_dir or ''}".rstrip("/")
         metadata_configs = MetadataConfigs.from_dataset_card_data(dataset_card_data)
         dataset_infos = DatasetInfosDict.from_dataset_card_data(dataset_card_data)
-        # Use the infos from the parquet export except in some cases:
-        if self.data_dir or self.data_files or (self.revision and self.revision != "main"):
-            use_exported_dataset_infos = False
-        else:
-            use_exported_dataset_infos = True
-        if config.USE_PARQUET_EXPORT and use_exported_dataset_infos:
+        if config.USE_PARQUET_EXPORT and self.use_exported_dataset_infos:
             try:
                 exported_dataset_infos = _dataset_viewer.get_exported_dataset_infos(
-                    dataset=self.name, revision=self.revision, token=self.download_config.token
+                    dataset=self.name, commit_hash=self.commit_hash, token=self.download_config.token
                 )
                 exported_dataset_infos = DatasetInfosDict(
                     {
@@ -1110,7 +1112,7 @@ def get_module(self) -> DatasetModule:
             ]
             default_config_name = None
         builder_kwargs = {
-            "base_path": hf_dataset_url(self.name, "", revision=revision).rstrip("/"),
+            "base_path": hf_dataset_url(self.name, "", revision=self.commit_hash).rstrip("/"),
             "repo_id": self.name,
             "dataset_name": camelcase_to_snakecase(Path(self.name).name),
         }
@@ -1122,7 +1124,7 @@ def get_module(self) -> DatasetModule:
         try:
             # this file is deprecated and was created automatically in old versions of push_to_hub
             dataset_infos_path = cached_path(
-                hf_dataset_url(self.name, config.DATASETDICT_INFOS_FILENAME, revision=revision),
+                hf_dataset_url(self.name, config.DATASETDICT_INFOS_FILENAME, revision=self.commit_hash),
                 download_config=download_config,
             )
             with open(dataset_infos_path, encoding="utf-8") as f:
@@ -1143,10 +1145,9 @@ def get_module(self) -> DatasetModule:
         if default_config_name is None and len(dataset_infos) == 1:
             default_config_name = next(iter(dataset_infos))
 
-        hash = revision
         return DatasetModule(
             module_path,
-            hash,
+            self.commit_hash,
             builder_kwargs,
             dataset_infos=dataset_infos,
             builder_configs_parameters=BuilderConfigsParameters(
@@ -1165,20 +1166,20 @@ class HubDatasetModuleFactoryWithParquetExport(_DatasetModuleFactory):
     def __init__(
         self,
         name: str,
-        revision: Optional[str] = None,
+        commit_hash: str,
         download_config: Optional[DownloadConfig] = None,
     ):
         self.name = name
-        self.revision = revision
+        self.commit_hash = commit_hash
         self.download_config = download_config or DownloadConfig()
         increase_load_count(name)
 
     def get_module(self) -> DatasetModule:
         exported_parquet_files = _dataset_viewer.get_exported_parquet_files(
-            dataset=self.name, revision=self.revision, token=self.download_config.token
+            dataset=self.name, commit_hash=self.commit_hash, token=self.download_config.token
         )
         exported_dataset_infos = _dataset_viewer.get_exported_dataset_infos(
-            dataset=self.name, revision=self.revision, token=self.download_config.token
+            dataset=self.name, commit_hash=self.commit_hash, token=self.download_config.token
         )
         dataset_infos = DatasetInfosDict(
             {
@@ -1186,15 +1187,26 @@ def get_module(self) -> DatasetModule:
                 for config_name in exported_dataset_infos
             }
         )
-        hfh_dataset_info = HfApi(config.HF_ENDPOINT).dataset_info(
-            self.name,
-            revision="refs/convert/parquet",
-            token=self.download_config.token,
-            timeout=100.0,
-        )
-        revision = hfh_dataset_info.sha  # fix the revision in case there are new commits in the meantime
+        parquet_commit_hash = (
+            HfApi(
+                endpoint=config.HF_ENDPOINT,
+                token=self.download_config.token,
+                library_name="datasets",
+                library_version=__version__,
+                user_agent=get_datasets_user_agent(self.download_config.user_agent),
+            )
+            .dataset_info(
+                self.name,
+                revision="refs/convert/parquet",
+                token=self.download_config.token,
+                timeout=100.0,
+            )
+            .sha
+        )  # fix the revision in case there are new commits in the meantime
         metadata_configs = MetadataConfigs._from_exported_parquet_files_and_dataset_infos(
-            revision=revision, exported_parquet_files=exported_parquet_files, dataset_infos=dataset_infos
+            parquet_commit_hash=parquet_commit_hash,
+            exported_parquet_files=exported_parquet_files,
+            dataset_infos=dataset_infos,
         )
         module_path, _ = _PACKAGED_DATASETS_MODULES["parquet"]
         builder_configs, default_config_name = create_builder_configs_from_metadata_configs(
@@ -1203,7 +1215,6 @@ def get_module(self) -> DatasetModule:
             supports_metadata=False,
             download_config=self.download_config,
         )
-        hash = self.revision
         builder_kwargs = {
             "repo_id": self.name,
             "dataset_name": camelcase_to_snakecase(Path(self.name).name),
@@ -1211,7 +1222,7 @@ def get_module(self) -> DatasetModule:
 
         return DatasetModule(
             module_path,
-            hash,
+            self.commit_hash,
             builder_kwargs,
             dataset_infos=dataset_infos,
             builder_configs_parameters=BuilderConfigsParameters(
@@ -1231,14 +1242,14 @@ class HubDatasetModuleFactoryWithScript(_DatasetModuleFactory):
     def __init__(
         self,
         name: str,
-        revision: Optional[Union[str, Version]] = None,
+        commit_hash: str,
         download_config: Optional[DownloadConfig] = None,
         download_mode: Optional[Union[DownloadMode, str]] = None,
         dynamic_modules_path: Optional[str] = None,
         trust_remote_code: Optional[bool] = None,
     ):
         self.name = name
-        self.revision = revision
+        self.commit_hash = commit_hash
         self.download_config = download_config or DownloadConfig()
         self.download_mode = download_mode
         self.dynamic_modules_path = dynamic_modules_path
@@ -1246,14 +1257,14 @@ def __init__(
         increase_load_count(name)
 
     def download_loading_script(self) -> str:
-        file_path = hf_dataset_url(self.name, self.name.split("/")[-1] + ".py", revision=self.revision)
+        file_path = hf_dataset_url(self.name, self.name.split("/")[-1] + ".py", revision=self.commit_hash)
         download_config = self.download_config.copy()
         if download_config.download_desc is None:
             download_config.download_desc = "Downloading builder script"
         return cached_path(file_path, download_config=download_config)
 
     def download_dataset_infos_file(self) -> str:
-        dataset_infos = hf_dataset_url(self.name, config.DATASETDICT_INFOS_FILENAME, revision=self.revision)
+        dataset_infos = hf_dataset_url(self.name, config.DATASETDICT_INFOS_FILENAME, revision=self.commit_hash)
         # Download the dataset infos file if available
         download_config = self.download_config.copy()
         if download_config.download_desc is None:
@@ -1267,7 +1278,7 @@ def download_dataset_infos_file(self) -> str:
             return None
 
     def download_dataset_readme_file(self) -> str:
-        readme_url = hf_dataset_url(self.name, config.REPOCARD_FILENAME, revision=self.revision)
+        readme_url = hf_dataset_url(self.name, config.REPOCARD_FILENAME, revision=self.commit_hash)
         # Download the dataset infos file if available
         download_config = self.download_config.copy()
         if download_config.download_desc is None:
@@ -1296,7 +1307,7 @@ def get_module(self) -> DatasetModule:
         imports = get_imports(local_path)
         local_imports, library_imports = _download_additional_modules(
             name=self.name,
-            base_path=hf_dataset_url(self.name, "", revision=self.revision),
+            base_path=hf_dataset_url(self.name, "", revision=self.commit_hash),
             imports=imports,
             download_config=self.download_config,
         )
@@ -1343,7 +1354,7 @@ def get_module(self) -> DatasetModule:
         # make the new module to be noticed by the import system
         importlib.invalidate_caches()
         builder_kwargs = {
-            "base_path": hf_dataset_url(self.name, "", revision=self.revision).rstrip("/"),
+            "base_path": hf_dataset_url(self.name, "", revision=self.commit_hash).rstrip("/"),
             "repo_id": self.name,
         }
         return DatasetModule(module_path, hash, builder_kwargs, importable_file_path=importable_file_path)
@@ -1574,46 +1585,74 @@ def dataset_module_factory(
     # Try remotely
     elif is_relative_path(path) and path.count("/") <= 1:
         try:
-            _raise_if_offline_mode_is_enabled()
-            hf_api = HfApi(config.HF_ENDPOINT)
+            # Get the Dataset Card + get the revision + check authentication all at in one call
+            # We fix the commit_hash in case there are new commits in the meantime
+            api = HfApi(
+                endpoint=config.HF_ENDPOINT,
+                token=download_config.token,
+                library_name="datasets",
+                library_version=__version__,
+                user_agent=get_datasets_user_agent(download_config.user_agent),
+            )
             try:
-                dataset_info = hf_api.dataset_info(
+                _raise_if_offline_mode_is_enabled()
+                dataset_readme_path = api.hf_hub_download(
                     repo_id=path,
+                    filename=config.REPOCARD_FILENAME,
+                    repo_type="dataset",
                     revision=revision,
-                    token=download_config.token,
-                    timeout=100.0,
+                    proxies=download_config.proxies,
                 )
+                commit_hash = os.path.basename(os.path.dirname(dataset_readme_path))
+            except LocalEntryNotFoundError as e:
+                if isinstance(
+                    e.__cause__,
+                    (
+                        OfflineModeIsEnabled,
+                        requests.exceptions.ConnectTimeout,
+                        requests.exceptions.ConnectionError,
+                    ),
+                ):
+                    raise ConnectionError(f"Couldn't reach '{path}' on the Hub ({e.__class__.__name__})") from e
+                else:
+                    raise
+            except EntryNotFoundError:
+                commit_hash = api.dataset_info(
+                    path,
+                    revision=revision,
+                    timeout=100.0,
+                ).sha
             except (
                 OfflineModeIsEnabled,
                 requests.exceptions.ConnectTimeout,
                 requests.exceptions.ConnectionError,
             ) as e:
                 raise ConnectionError(f"Couldn't reach '{path}' on the Hub ({e.__class__.__name__})") from e
+            except GatedRepoError as e:
+                message = f"Dataset '{path}' is a gated dataset on the Hub."
+                if e.response.status_code == 401:
+                    message += " You must be authenticated to access it."
+                elif e.response.status_code == 403:
+                    message += f" Visit the dataset page at https://huggingface.co/datasets/{path} to ask for access."
+                raise DatasetNotFoundError(message) from e
             except RevisionNotFoundError as e:
                 raise DatasetNotFoundError(
                     f"Revision '{revision}' doesn't exist for dataset '{path}' on the Hub."
                 ) from e
             except RepositoryNotFoundError as e:
                 raise DatasetNotFoundError(f"Dataset '{path}' doesn't exist on the Hub or cannot be accessed.") from e
-            if dataset_info.gated:
-                try:
-                    check_auth(hf_api, repo_id=path, token=download_config.token)
-                except GatedRepoError as e:
-                    message = f"Dataset '{path}' is a gated dataset on the Hub."
-                    if "401 Client Error" in str(e):
-                        message += " You must be authenticated to access it."
-                    elif "403 Client Error" in str(e):
-                        message += (
-                            f" Visit the dataset page at https://huggingface.co/datasets/{path} to ask for access."
-                        )
-                    raise DatasetNotFoundError(message) from e
-
-            if filename in [sibling.rfilename for sibling in dataset_info.siblings]:  # contains a dataset script
-                fs = HfFileSystem(endpoint=config.HF_ENDPOINT, token=download_config.token)
+            try:
+                dataset_script_path = api.hf_hub_download(
+                    repo_id=path,
+                    filename=filename,
+                    repo_type="dataset",
+                    revision=commit_hash,
+                    proxies=download_config.proxies,
+                )
                 if _require_custom_configs or (revision and revision != "main"):
                     can_load_config_from_parquet_export = False
                 elif _require_default_config_name:
-                    with fs.open(f"datasets/{path}/{filename}", "r", encoding="utf-8") as f:
+                    with open(dataset_script_path, "r", encoding="utf-8") as f:
                         can_load_config_from_parquet_export = "DEFAULT_CONFIG_NAME" not in f.read()
                 else:
                     can_load_config_from_parquet_export = True
@@ -1622,29 +1661,48 @@ def dataset_module_factory(
                     # This fails when the dataset has multiple configs and a default config and
                     # the user didn't specify a configuration name (_require_default_config_name=True).
                     try:
-                        return HubDatasetModuleFactoryWithParquetExport(
-                            path, download_config=download_config, revision=dataset_info.sha
+                        out = HubDatasetModuleFactoryWithParquetExport(
+                            path, download_config=download_config, commit_hash=commit_hash
                         ).get_module()
+                        logger.info("Loading the dataset from the Parquet export on Hugging Face.")
+                        return out
                     except _dataset_viewer.DatasetViewerError:
                         pass
                 # Otherwise we must use the dataset script if the user trusts it
                 return HubDatasetModuleFactoryWithScript(
                     path,
-                    revision=revision,
+                    commit_hash=commit_hash,
                     download_config=download_config,
                     download_mode=download_mode,
                     dynamic_modules_path=dynamic_modules_path,
                     trust_remote_code=trust_remote_code,
                 ).get_module()
-            else:
+            except EntryNotFoundError:
+                # Use the infos from the parquet export except in some cases:
+                if data_dir or data_files or (revision and revision != "main"):
+                    use_exported_dataset_infos = False
+                else:
+                    use_exported_dataset_infos = True
                 return HubDatasetModuleFactoryWithoutScript(
                     path,
-                    revision=revision,
+                    commit_hash=commit_hash,
                     data_dir=data_dir,
                     data_files=data_files,
                     download_config=download_config,
                     download_mode=download_mode,
+                    use_exported_dataset_infos=use_exported_dataset_infos,
                 ).get_module()
+            except GatedRepoError as e:
+                message = f"Dataset '{path}' is a gated dataset on the Hub."
+                if e.response.status_code == 401:
+                    message += " You must be authenticated to access it."
+                elif e.response.status_code == 403:
+                    message += f" Visit the dataset page at https://huggingface.co/datasets/{path} to ask for access."
+                raise DatasetNotFoundError(message) from e
+            except RevisionNotFoundError as e:
+                raise DatasetNotFoundError(
+                    f"Revision '{revision}' doesn't exist for dataset '{path}' on the Hub."
+                ) from e
         except Exception as e1:
             # All the attempts failed, before raising the error we should check if the module is already cached
             try:
diff --git a/src/datasets/utils/_dataset_viewer.py b/src/datasets/utils/_dataset_viewer.py
index b8cf6ea49e1..092741a956c 100644
--- a/src/datasets/utils/_dataset_viewer.py
+++ b/src/datasets/utils/_dataset_viewer.py
@@ -23,7 +23,9 @@ class DatasetViewerError(DatasetsError):
     """
 
 
-def get_exported_parquet_files(dataset: str, revision: str, token: Optional[Union[str, bool]]) -> List[Dict[str, Any]]:
+def get_exported_parquet_files(
+    dataset: str, commit_hash: str, token: Optional[Union[str, bool]]
+) -> List[Dict[str, Any]]:
     """
     Get the dataset exported parquet files
     Docs: https://huggingface.co/docs/datasets-server/parquet
@@ -37,7 +39,7 @@ def get_exported_parquet_files(dataset: str, revision: str, token: Optional[Unio
         )
         parquet_data_files_response.raise_for_status()
         if "X-Revision" in parquet_data_files_response.headers:
-            if parquet_data_files_response.headers["X-Revision"] == revision or revision is None:
+            if parquet_data_files_response.headers["X-Revision"] == commit_hash or commit_hash is None:
                 parquet_data_files_response_json = parquet_data_files_response.json()
                 if (
                     parquet_data_files_response_json.get("partial") is False
@@ -50,7 +52,7 @@ def get_exported_parquet_files(dataset: str, revision: str, token: Optional[Unio
                     logger.debug(f"Parquet export for {dataset} is not completely ready yet.")
             else:
                 logger.debug(
-                    f"Parquet export for {dataset} is available but outdated (revision='{parquet_data_files_response.headers['X-Revision']}')"
+                    f"Parquet export for {dataset} is available but outdated (commit_hash='{parquet_data_files_response.headers['X-Revision']}')"
                 )
     except Exception as e:  # noqa catch any exception of the dataset viewer API and consider the parquet export doesn't exist
         logger.debug(f"No parquet export for {dataset} available ({type(e).__name__}: {e})")
@@ -58,7 +60,7 @@ def get_exported_parquet_files(dataset: str, revision: str, token: Optional[Unio
 
 
 def get_exported_dataset_infos(
-    dataset: str, revision: str, token: Optional[Union[str, bool]]
+    dataset: str, commit_hash: str, token: Optional[Union[str, bool]]
 ) -> Dict[str, Dict[str, Any]]:
     """
     Get the dataset information, can be useful to get e.g. the dataset features.
@@ -73,7 +75,7 @@ def get_exported_dataset_infos(
         )
         info_response.raise_for_status()
         if "X-Revision" in info_response.headers:
-            if info_response.headers["X-Revision"] == revision or revision is None:
+            if info_response.headers["X-Revision"] == commit_hash or commit_hash is None:
                 info_response = info_response.json()
                 if (
                     info_response.get("partial") is False
@@ -86,7 +88,7 @@ def get_exported_dataset_infos(
                     logger.debug(f"Dataset info for {dataset} is not completely ready yet.")
             else:
                 logger.debug(
-                    f"Dataset info for {dataset} is available but outdated (revision='{info_response.headers['X-Revision']}')"
+                    f"Dataset info for {dataset} is available but outdated (commit_hash='{info_response.headers['X-Revision']}')"
                 )
     except Exception as e:  # noqa catch any exception of the dataset viewer API and consider the dataset info doesn't exist
         logger.debug(f"No dataset info for {dataset} available ({type(e).__name__}: {e})")
diff --git a/src/datasets/utils/file_utils.py b/src/datasets/utils/file_utils.py
index 9bd2a1c3928..e44b1ce12bc 100644
--- a/src/datasets/utils/file_utils.py
+++ b/src/datasets/utils/file_utils.py
@@ -282,14 +282,10 @@ def get_authentication_headers_for_url(url: str, token: Optional[Union[str, bool
         return {}
 
 
-class OfflineModeIsEnabled(ConnectionError):
-    pass
-
-
 def _raise_if_offline_mode_is_enabled(msg: Optional[str] = None):
     """Raise an OfflineModeIsEnabled error (subclass of ConnectionError) if HF_HUB_OFFLINE is True."""
     if config.HF_HUB_OFFLINE:
-        raise OfflineModeIsEnabled(
+        raise huggingface_hub.errors.OfflineModeIsEnabled(
             "Offline mode is enabled." if msg is None else "Offline mode is enabled. " + str(msg)
         )
 
diff --git a/src/datasets/utils/metadata.py b/src/datasets/utils/metadata.py
index fa463272213..21629407e4c 100644
--- a/src/datasets/utils/metadata.py
+++ b/src/datasets/utils/metadata.py
@@ -102,7 +102,7 @@ def _raise_if_data_files_field_not_valid(metadata_config: dict):
     @classmethod
     def _from_exported_parquet_files_and_dataset_infos(
         cls,
-        revision: str,
+        parquet_commit_hash: str,
         exported_parquet_files: List[Dict[str, Any]],
         dataset_infos: DatasetInfosDict,
     ) -> "MetadataConfigs":
@@ -112,7 +112,7 @@ def _from_exported_parquet_files_and_dataset_infos(
                     {
                         "split": split_name,
                         "path": [
-                            parquet_file["url"].replace("refs%2Fconvert%2Fparquet", revision)
+                            parquet_file["url"].replace("refs%2Fconvert%2Fparquet", parquet_commit_hash)
                             for parquet_file in parquet_files_for_split
                         ],
                     }
diff --git a/tests/test_file_utils.py b/tests/test_file_utils.py
index e82b5d11dc9..6f6ac01df9a 100644
--- a/tests/test_file_utils.py
+++ b/tests/test_file_utils.py
@@ -7,10 +7,10 @@
 import zstandard as zstd
 from fsspec.registry import _registry as _fsspec_registry
 from fsspec.spec import AbstractBufferedFile, AbstractFileSystem
+from huggingface_hub.errors import OfflineModeIsEnabled
 
 from datasets.download.download_config import DownloadConfig
 from datasets.utils.file_utils import (
-    OfflineModeIsEnabled,
     _get_extraction_protocol,
     _prepare_single_hop_path_and_storage_options,
     cached_path,
diff --git a/tests/test_load.py b/tests/test_load.py
index 3595f9735ec..5d551e8afbe 100644
--- a/tests/test_load.py
+++ b/tests/test_load.py
@@ -90,8 +90,13 @@ def _generate_examples(self, filepath, **kwargs):
 SAMPLE_DATASET_IDENTIFIER3 = "hf-internal-testing/multi_dir_dataset"  # has multiple data directories
 SAMPLE_DATASET_IDENTIFIER4 = "hf-internal-testing/imagefolder_with_metadata"  # imagefolder with a metadata file outside of the train/test directories
 SAMPLE_DATASET_IDENTIFIER5 = "hf-internal-testing/imagefolder_with_metadata_no_splits"  # imagefolder with a metadata file and no default split names in data files
-SAMPLE_NOT_EXISTING_DATASET_IDENTIFIER = "hf-internal-testing/_dummy"
-SAMPLE_DATASET_NAME_THAT_DOESNT_EXIST = "_dummy"
+
+SAMPLE_DATASET_COMMIT_HASH = "0e1cee81e718feadf49560b287c4eb669c2efb1a"
+SAMPLE_DATASET_COMMIT_HASH2 = "c19550d35263090b1ec2bfefdbd737431fafec40"
+SAMPLE_DATASET_COMMIT_HASH3 = "aaa2d4bdd1d877d1c6178562cfc584bdfa90f6dc"
+SAMPLE_DATASET_COMMIT_HASH4 = "a7415617490f32e51c2f0ea20b5ce7cfba035a62"
+SAMPLE_DATASET_COMMIT_HASH5 = "4971fa562942cab8263f56a448c3f831b18f1c27"
+
 SAMPLE_DATASET_NO_CONFIGS_IN_METADATA = "hf-internal-testing/audiofolder_no_configs_in_metadata"
 SAMPLE_DATASET_SINGLE_CONFIG_IN_METADATA = "hf-internal-testing/audiofolder_single_config_in_metadata"
 SAMPLE_DATASET_TWO_CONFIG_IN_METADATA = "hf-internal-testing/audiofolder_two_configs_in_metadata"
@@ -100,6 +105,15 @@ def _generate_examples(self, filepath, **kwargs):
 )
 SAMPLE_DATASET_CAPITAL_LETTERS_IN_NAME = "hf-internal-testing/DatasetWithCapitalLetters"
 
+SAMPLE_DATASET_NO_CONFIGS_IN_METADATA_COMMIT_HASH = "26cd5079bb0d3cd1521c6894765a0b8edb159d7f"
+SAMPLE_DATASET_SINGLE_CONFIG_IN_METADATA_COMMIT_HASH = "1668dfc91efae975e44457cdabef60fb9200820a"
+SAMPLE_DATASET_TWO_CONFIG_IN_METADATA_COMMIT_HASH = "e71bce498e6c2bd2c58b20b097fdd3389793263f"
+SAMPLE_DATASET_TWO_CONFIG_IN_METADATA_WITH_DEFAULT_COMMIT_HASH = "38937109bb4dc7067f575fe6e7b420158eb9cf32"
+SAMPLE_DATASET_CAPITAL_LETTERS_IN_NAME_COMMIT_HASH = "70aa36264a6954920a13dd0465156a60b9f8af4b"
+
+SAMPLE_NOT_EXISTING_DATASET_IDENTIFIER = "hf-internal-testing/_dummy"
+SAMPLE_DATASET_NAME_THAT_DOESNT_EXIST = "_dummy"
+
 
 @pytest.fixture
 def data_dir(tmp_path):
@@ -388,14 +402,16 @@ def setUp(self):
 
     def test_HubDatasetModuleFactoryWithScript_dont_trust_remote_code(self):
         factory = HubDatasetModuleFactoryWithScript(
-            "hf-internal-testing/dataset_with_script",
+            SAMPLE_DATASET_IDENTIFIER,
+            commit_hash=SAMPLE_DATASET_COMMIT_HASH,
             download_config=self.download_config,
             dynamic_modules_path=self.dynamic_modules_path,
         )
         with patch.object(config, "HF_DATASETS_TRUST_REMOTE_CODE", None):  # this will be the default soon
             self.assertRaises(ValueError, factory.get_module)
         factory = HubDatasetModuleFactoryWithScript(
-            "hf-internal-testing/dataset_with_script",
+            SAMPLE_DATASET_IDENTIFIER,
+            commit_hash=SAMPLE_DATASET_COMMIT_HASH,
             download_config=self.download_config,
             dynamic_modules_path=self.dynamic_modules_path,
             trust_remote_code=False,
@@ -406,9 +422,9 @@ def test_HubDatasetModuleFactoryWithScript_with_hub_dataset(self):
         # "wmt_t2t" has additional imports (internal)
         factory = HubDatasetModuleFactoryWithScript(
             "wmt_t2t",
+            commit_hash="861aac88b2c6247dd93ade8b1c189ce714627750",
             download_config=self.download_config,
             dynamic_modules_path=self.dynamic_modules_path,
-            revision="861aac88b2c6247dd93ade8b1c189ce714627750",
             trust_remote_code=True,
         )
         module_factory_result = factory.get_module()
@@ -616,7 +632,7 @@ def test_PackagedDatasetModuleFactory_with_data_dir_and_metadata(self):
     @pytest.mark.integration
     def test_HubDatasetModuleFactoryWithoutScript(self):
         factory = HubDatasetModuleFactoryWithoutScript(
-            SAMPLE_DATASET_IDENTIFIER2, download_config=self.download_config
+            SAMPLE_DATASET_IDENTIFIER2, commit_hash=SAMPLE_DATASET_COMMIT_HASH2, download_config=self.download_config
         )
         module_factory_result = factory.get_module()
         assert importlib.import_module(module_factory_result.module_path) is not None
@@ -626,7 +642,10 @@ def test_HubDatasetModuleFactoryWithoutScript(self):
     def test_HubDatasetModuleFactoryWithoutScript_with_data_dir(self):
         data_dir = "data2"
         factory = HubDatasetModuleFactoryWithoutScript(
-            SAMPLE_DATASET_IDENTIFIER3, data_dir=data_dir, download_config=self.download_config
+            SAMPLE_DATASET_IDENTIFIER3,
+            commit_hash=SAMPLE_DATASET_COMMIT_HASH3,
+            data_dir=data_dir,
+            download_config=self.download_config,
         )
         module_factory_result = factory.get_module()
         assert importlib.import_module(module_factory_result.module_path) is not None
@@ -645,7 +664,7 @@ def test_HubDatasetModuleFactoryWithoutScript_with_data_dir(self):
     @pytest.mark.integration
     def test_HubDatasetModuleFactoryWithoutScript_with_metadata(self):
         factory = HubDatasetModuleFactoryWithoutScript(
-            SAMPLE_DATASET_IDENTIFIER4, download_config=self.download_config
+            SAMPLE_DATASET_IDENTIFIER4, commit_hash=SAMPLE_DATASET_COMMIT_HASH4, download_config=self.download_config
         )
         module_factory_result = factory.get_module()
         assert importlib.import_module(module_factory_result.module_path) is not None
@@ -660,7 +679,7 @@ def test_HubDatasetModuleFactoryWithoutScript_with_metadata(self):
         assert any(Path(data_file).name == "metadata.jsonl" for data_file in builder_config.data_files["test"])
 
         factory = HubDatasetModuleFactoryWithoutScript(
-            SAMPLE_DATASET_IDENTIFIER5, download_config=self.download_config
+            SAMPLE_DATASET_IDENTIFIER5, commit_hash=SAMPLE_DATASET_COMMIT_HASH5, download_config=self.download_config
         )
         module_factory_result = factory.get_module()
         assert importlib.import_module(module_factory_result.module_path) is not None
@@ -677,6 +696,7 @@ def test_HubDatasetModuleFactoryWithoutScript_with_metadata(self):
     def test_HubDatasetModuleFactoryWithoutScript_with_one_default_config_in_metadata(self):
         factory = HubDatasetModuleFactoryWithoutScript(
             SAMPLE_DATASET_SINGLE_CONFIG_IN_METADATA,
+            commit_hash=SAMPLE_DATASET_SINGLE_CONFIG_IN_METADATA_COMMIT_HASH,
             download_config=self.download_config,
         )
         module_factory_result = factory.get_module()
@@ -714,9 +734,17 @@ def test_HubDatasetModuleFactoryWithoutScript_with_one_default_config_in_metadat
 
     @pytest.mark.integration
     def test_HubDatasetModuleFactoryWithoutScript_with_two_configs_in_metadata(self):
-        datasets_names = [SAMPLE_DATASET_TWO_CONFIG_IN_METADATA, SAMPLE_DATASET_TWO_CONFIG_IN_METADATA_WITH_DEFAULT]
-        for dataset_name in datasets_names:
-            factory = HubDatasetModuleFactoryWithoutScript(dataset_name, download_config=self.download_config)
+        datasets_names = [
+            (SAMPLE_DATASET_TWO_CONFIG_IN_METADATA, SAMPLE_DATASET_TWO_CONFIG_IN_METADATA_COMMIT_HASH),
+            (
+                SAMPLE_DATASET_TWO_CONFIG_IN_METADATA_WITH_DEFAULT,
+                SAMPLE_DATASET_TWO_CONFIG_IN_METADATA_WITH_DEFAULT_COMMIT_HASH,
+            ),
+        ]
+        for dataset_name, commit_hash in datasets_names:
+            factory = HubDatasetModuleFactoryWithoutScript(
+                dataset_name, commit_hash=commit_hash, download_config=self.download_config
+            )
             module_factory_result = factory.get_module()
             assert importlib.import_module(module_factory_result.module_path) is not None
 
@@ -767,6 +795,7 @@ def test_HubDatasetModuleFactoryWithoutScript_with_two_configs_in_metadata(self)
     def test_HubDatasetModuleFactoryWithScript(self):
         factory = HubDatasetModuleFactoryWithScript(
             SAMPLE_DATASET_IDENTIFIER,
+            commit_hash=SAMPLE_DATASET_COMMIT_HASH,
             download_config=self.download_config,
             dynamic_modules_path=self.dynamic_modules_path,
             trust_remote_code=True,
@@ -779,6 +808,7 @@ def test_HubDatasetModuleFactoryWithScript(self):
     def test_HubDatasetModuleFactoryWithParquetExport(self):
         factory = HubDatasetModuleFactoryWithParquetExport(
             SAMPLE_DATASET_IDENTIFIER,
+            commit_hash=SAMPLE_DATASET_COMMIT_HASH,
             download_config=self.download_config,
         )
         module_factory_result = factory.get_module()
@@ -802,13 +832,13 @@ def test_HubDatasetModuleFactoryWithParquetExport_errors_on_wrong_sha(self):
         factory = HubDatasetModuleFactoryWithParquetExport(
             SAMPLE_DATASET_IDENTIFIER,
             download_config=self.download_config,
-            revision="0e1cee81e718feadf49560b287c4eb669c2efb1a",
+            commit_hash=SAMPLE_DATASET_COMMIT_HASH,
         )
         factory.get_module()
         factory = HubDatasetModuleFactoryWithParquetExport(
             SAMPLE_DATASET_IDENTIFIER,
             download_config=self.download_config,
-            revision="wrong_sha",
+            commit_hash="wrong_sha",
         )
         with self.assertRaises(_dataset_viewer.DatasetViewerError):
             factory.get_module()
@@ -846,19 +876,22 @@ def test_CachedDatasetModuleFactory_with_script(self):
 
 
 @pytest.mark.parametrize(
-    "factory_class",
+    "factory_class,requires_commit_hash",
     [
-        CachedDatasetModuleFactory,
-        HubDatasetModuleFactoryWithoutScript,
-        HubDatasetModuleFactoryWithScript,
-        LocalDatasetModuleFactoryWithoutScript,
-        LocalDatasetModuleFactoryWithScript,
-        PackagedDatasetModuleFactory,
+        (CachedDatasetModuleFactory, False),
+        (HubDatasetModuleFactoryWithoutScript, True),
+        (HubDatasetModuleFactoryWithScript, True),
+        (LocalDatasetModuleFactoryWithoutScript, False),
+        (LocalDatasetModuleFactoryWithScript, False),
+        (PackagedDatasetModuleFactory, False),
     ],
 )
-def test_module_factories(factory_class):
+def test_module_factories(factory_class, requires_commit_hash):
     name = "dummy_name"
-    factory = factory_class(name)
+    if requires_commit_hash:
+        factory = factory_class(name, commit_hash="foo")
+    else:
+        factory = factory_class(name)
     assert factory.name == name