Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🧪 Do not mock create_commit #7076

Closed
wants to merge 2 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 3 additions & 72 deletions tests/test_hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ def test_dataset_url(repo_id, filename, revision):
assert url == f"https://huggingface.co/datasets/{repo_id}/resolve/{revision or 'main'}/{quote(filename)}"


# Temporarily mark this test as expected to fail: GH-7073
@pytest.mark.xfail
def test_convert_to_parquet(temporary_repo, hf_api, hf_token, ci_hub_config, ci_hfh_hf_hub_url):
with temporary_repo() as repo_id:
hf_api.create_repo(repo_id, token=hf_token, repo_type="dataset")
Expand All @@ -57,81 +55,14 @@ def test_convert_to_parquet(temporary_repo, hf_api, hf_token, ci_hub_config, ci_
repo_id=repo_id,
repo_type="dataset",
)
commit_info = SimpleNamespace(
pr_revision="refs/pr/1", # "main", #
pr_url="https:///hub-ci.huggingface.co/datasets/__DUMMY_USER__/__DUMMY_DATASET__/refs%2Fpr%2F1",
)
with patch.object(datasets.hub.HfApi, "create_commit", return_value=commit_info) as mock_create_commit:
with patch.object(datasets.hub.HfApi, "create_branch") as mock_create_branch:
with patch.object(datasets.hub.HfApi, "list_repo_tree", return_value=[]): # not needed
_ = convert_to_parquet(repo_id, token=hf_token, trust_remote_code=True)
with patch.object(datasets.hub.HfApi, "create_branch") as mock_create_branch:
with patch.object(datasets.hub.HfApi, "list_repo_tree", return_value=[]): # not needed
_ = convert_to_parquet(repo_id, token=hf_token, trust_remote_code=True)
# mock_create_branch
assert mock_create_branch.called
assert mock_create_branch.call_count == 2
for call_args, expected_branch in zip(mock_create_branch.call_args_list, ["refs/pr/1", "script"]):
assert call_args.kwargs.get("branch") == expected_branch
# mock_create_commit
assert mock_create_commit.called
assert mock_create_commit.call_count == 2
expected_readmes = [
dedent(f"""\
---
dataset_info:
config_name: first
features:
- name: text
dtype: string
splits:
- name: train
num_bytes: 55
num_examples: 5
download_size: 790
dataset_size: 55
{METADATA_CONFIGS_FIELD}:
- config_name: first
data_files:
- split: train
path: first/train-*
default: true
---
"""),
dedent(f"""\
---
dataset_info:
config_name: second
features:
- name: text
dtype: string
splits:
- name: train
num_bytes: 60
num_examples: 5
download_size: 798
dataset_size: 60
{METADATA_CONFIGS_FIELD}:
- config_name: second
data_files:
- split: train
path: second/train-*
---
"""),
]
for call_args, expected_commit_message, expected_create_pr, expected_readme, expected_parquet_path_in_repo in zip(
mock_create_commit.call_args_list,
["Convert dataset to Parquet", "Add 'second' config data files"],
[True, False],
expected_readmes,
["first/train-00000-of-00001.parquet", "second/train-00000-of-00001.parquet"],
):
assert call_args.kwargs.get("commit_message") == expected_commit_message
assert call_args.kwargs.get("create_pr") is expected_create_pr
operations = call_args.kwargs.get("operations")
assert len(operations) == 2
for operation in operations:
if operation.path_in_repo == "README.md":
assert operation.path_or_fileobj.decode() == expected_readme
else:
assert operation.path_in_repo == expected_parquet_path_in_repo


def test_delete_from_hub(temporary_repo, hf_api, hf_token, csv_path, ci_hub_config, ci_hfh_hf_hub_url) -> None:
Expand Down
Loading