From 439e115d34a2d8737af719660c1b586ac32279dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= <45557362+qgallouedec@users.noreply.github.com> Date: Thu, 31 Aug 2023 15:49:59 +0200 Subject: [PATCH] Add missing `revision` argument (#6191) * add revision * style * add revision * fix dataset info --- src/datasets/arrow_dataset.py | 12 ++++++------ src/datasets/dataset_dict.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py index 45a92263920..fb158986c55 100644 --- a/src/datasets/arrow_dataset.py +++ b/src/datasets/arrow_dataset.py @@ -5331,7 +5331,7 @@ def path_in_repo(_index, shard): ] download_config = DownloadConfig(token=token) deleted_size = sum( - xgetsize(hf_hub_url(repo_id, data_file), download_config=download_config) + xgetsize(hf_hub_url(repo_id, data_file, revision=branch), download_config=download_config) for data_file in data_files_to_delete ) @@ -5446,15 +5446,15 @@ def push_to_hub( download_config.download_desc = "Downloading metadata" download_config.token = token dataset_readme_path = cached_path( - hf_hub_url(repo_id, "README.md"), + hf_hub_url(repo_id, "README.md", revision=branch), download_config=download_config, ) dataset_card = DatasetCard.load(Path(dataset_readme_path)) dataset_card_data = dataset_card.data metadata_configs = MetadataConfigs.from_dataset_card_data(dataset_card_data) dataset_infos: DatasetInfosDict = DatasetInfosDict.from_dataset_card_data(dataset_card_data) - if dataset_infos: - repo_info = dataset_infos[next(iter(dataset_infos))] + if dataset_infos and config_name in dataset_infos: + repo_info = dataset_infos[config_name] else: repo_info = None # get the deprecated dataset_infos.json to update them @@ -5466,7 +5466,7 @@ def push_to_hub( download_config.download_desc = "Downloading metadata" download_config.token = token dataset_infos_path = cached_path( - hf_hub_url(repo_id, config.DATASETDICT_INFOS_FILENAME), + hf_hub_url(repo_id, config.DATASETDICT_INFOS_FILENAME, revision=branch), download_config=download_config, ) with open(dataset_infos_path, encoding="utf-8") as f: @@ -5547,7 +5547,7 @@ def push_to_hub( download_config.download_desc = "Downloading deprecated dataset_infos.json" download_config.use_auth_token = token dataset_infos_path = cached_path( - hf_hub_url(repo_id, config.DATASETDICT_INFOS_FILENAME), + hf_hub_url(repo_id, config.DATASETDICT_INFOS_FILENAME, revision=branch), download_config=download_config, ) with open(dataset_infos_path, encoding="utf-8") as f: diff --git a/src/datasets/dataset_dict.py b/src/datasets/dataset_dict.py index e54cbc9c291..ce6db377f30 100644 --- a/src/datasets/dataset_dict.py +++ b/src/datasets/dataset_dict.py @@ -1670,7 +1670,7 @@ def push_to_hub( download_config.download_desc = "Downloading metadata" download_config.token = token dataset_readme_path = cached_path( - hf_hub_url(repo_id, "README.md"), + hf_hub_url(repo_id, "README.md", revision=branch), download_config=download_config, ) dataset_card = DatasetCard.load(Path(dataset_readme_path)) @@ -1713,7 +1713,7 @@ def push_to_hub( download_config.download_desc = "Downloading metadata" download_config.token = token dataset_infos_path = cached_path( - hf_hub_url(repo_id, config.DATASETDICT_INFOS_FILENAME), + hf_hub_url(repo_id, config.DATASETDICT_INFOS_FILENAME, revision=branch), download_config=download_config, ) with open(dataset_infos_path, encoding="utf-8") as f: