-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: annex interfaces and initialization
`Repo` and `Worktree` received dedicated, optional support for annexes, and their initialization. The concept is is substantially different from that implemented in legacy DataLad. There, an `AnnexRepo` class was derived from a `GitRepo` class and extended and overwrote individual methods, forming a relatively high-level API. Here, any `Repo` or `Worktree` can have an optional annex. All operations related to that annex are implemented in dedicated handlers that are fully independent of a `Repo` or `Worktree` instance. The aim is to reduce the complex interdependencies that cripple the validity and robustness of the legacy implementations. Git is used more "directly", even in git-annex repositories and git-annex is used more directly, and agnostic of the context and demands of high-level operations. This has important consequences. For example, a method like the legacy `AnnexRepo.save()` cannot exist, because there is no `GitRepo.save` that it can override and "do the right thing" for a git-annex repository. This is acceptable here, because the API provided in this module is not targeting the level of compound/convenience operations like `save()` that aim to alleviate a developer's required expertise. Instead, the aim here is to provide primitives that can be used in a higher-level (possibly function-based) API.
- Loading branch information
Showing
4 changed files
with
334 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
from __future__ import annotations | ||
|
||
from pathlib import Path | ||
|
||
from datalad_core.runners import ( | ||
CommandError, | ||
call_annex_json_lines, | ||
call_git_lines, | ||
) | ||
|
||
|
||
class AnnexBase: | ||
""" """ | ||
|
||
def __init__(self, path: Path): | ||
self._path = path | ||
self._uuid = None | ||
|
||
@property | ||
def path(self) -> Path: | ||
"""Path used by git-annex commands as PWD""" | ||
return self._path | ||
|
||
# not using a cached_property, because it would not prevent write-access | ||
@property | ||
def uuid(self) -> str: | ||
if self._uuid is None: | ||
for ax in self.list_annexes(): | ||
if ax['here'] is True: | ||
self._uuid = ax['uuid'] | ||
return self._uuid | ||
if self._uuid is None: # pragma: no cover | ||
msg = 'annex unexpectedly has no UUID' | ||
raise RuntimeError(msg) | ||
return self._uuid | ||
|
||
def list_annexes(self) -> list[dict[str, str | bool]]: | ||
res = list(call_annex_json_lines(['info', '--fast'], cwd=self.path)) | ||
if len(res) != 1: # pragma: no cover | ||
msg = 'unexpected output from git-annex-info' | ||
raise RuntimeError(msg) | ||
info = res[0] | ||
annexes: list[dict[str, str | bool]] = [] | ||
for rt in ( | ||
'untrusted repositories', | ||
'semitrusted repositories', | ||
'trusted repositories', | ||
): | ||
for r in info[rt]: | ||
# TODO: make ENUM? | ||
r.update(trust=rt.split(' ')[0]) | ||
annexes.append(r) | ||
return annexes | ||
|
||
|
||
class BareRepoAnnex(AnnexBase): | ||
# ATTN: This class should not get additional methods. Instead, they should | ||
# all go into AnnexBase, if they can work with bare and non-bare | ||
# repositories. The purpose of this class is solely to enforce use with | ||
# a bare repository in its __init__() | ||
""" """ | ||
|
||
def __init__(self, path: Path): | ||
""" | ||
The given ``path`` must point to a bare Git repository and is used | ||
to resolve and confirm the presence of an annex. | ||
""" | ||
bare, annex_loc = call_git_lines( | ||
[ | ||
'-C', | ||
str(path), | ||
'rev-parse', | ||
'--path-format=absolute', | ||
'--is-bare-repository', | ||
'--git-path', | ||
'annex', | ||
], | ||
) | ||
if bare != 'true': | ||
msg = f'not a bare repository at {path}' | ||
raise ValueError(msg) | ||
# this simple test is also what is done in legacy AnnexRepo | ||
annex_path = Path(annex_loc) | ||
if not annex_path.exists(): | ||
msg = f'no repository annex found at {annex_path}' | ||
raise ValueError(msg) | ||
super().__init__(annex_path.parent) | ||
|
||
|
||
class Annex(AnnexBase): | ||
""" """ | ||
|
||
def __init__(self, path: Path): | ||
""" | ||
The given ``path`` must point to a Git repository worktree | ||
and is used to resolve and confirm the presence of an annex. | ||
""" | ||
try: | ||
annex_loc, worktree_loc = call_git_lines( | ||
[ | ||
'-C', | ||
str(path), | ||
'rev-parse', | ||
'--path-format=absolute', | ||
'--git-path', | ||
'annex', | ||
'--show-toplevel', | ||
], | ||
) | ||
except CommandError as e: | ||
msg = f'cannot resolve paths for a worktree with an annex at {path}' | ||
raise ValueError(msg) from e | ||
# this simple test is also what is done in legacy AnnexRepo | ||
annex_path = Path(annex_loc) | ||
if not annex_path.exists(): | ||
msg = f'no repository annex found at {annex_path}' | ||
raise ValueError(msg) | ||
super().__init__(Path(worktree_loc)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
import pytest | ||
|
||
from datalad_core.runners import ( | ||
CommandError, | ||
) | ||
|
||
from ..annex import ( | ||
Annex, | ||
BareRepoAnnex, | ||
) | ||
from ..repo import Repo | ||
from ..worktree import Worktree | ||
|
||
|
||
def test_barerepoannex_error(baregitrepo): | ||
with pytest.raises(ValueError, match='no repository annex found'): | ||
BareRepoAnnex(baregitrepo) | ||
|
||
|
||
def test_barerepoannex_nonbare(annexrepo): | ||
with pytest.raises(ValueError, match='not a bare repository'): | ||
BareRepoAnnex(annexrepo) | ||
|
||
wt = Worktree(annexrepo) | ||
with pytest.raises(TypeError, match='Cannot initialize annex in a non-bare repo'): | ||
wt.repo.init_annex() | ||
|
||
assert wt.repo.bare_annex is None | ||
|
||
|
||
# # check annex location resolution. it should be fairly robust and | ||
# # pretty much always find the annex, as long as the path points | ||
# # anywhere inside a git repo | ||
# annex0 = BareRepoAnnex(annexrepo) | ||
# annex1 = BareRepoAnnex(annexrepo / '.git') | ||
# annex2 = BareRepoAnnex(annexrepo / '.git' / 'annex') | ||
# testdir = annexrepo / 'somedir' | ||
# testdir.mkdir() | ||
# annex3 = BareRepoAnnex(testdir) | ||
# assert annex0.path == annex1.path == annex2.path == annex3.path | ||
|
||
|
||
def test_annex_error(annexrepo): | ||
with pytest.raises(ValueError, match='cannot resolve path'): | ||
Annex(Worktree(annexrepo).git_dir) | ||
|
||
|
||
def test_annex_noannex(gitrepo): | ||
with pytest.raises(ValueError, match='no repository annex found'): | ||
Annex(gitrepo) | ||
|
||
wt = Worktree(gitrepo) | ||
assert wt.annex is None | ||
# and it keeps it that way on repeated trials | ||
assert wt.annex is None | ||
|
||
|
||
def test_annex(annexrepo): | ||
annex0 = Annex(annexrepo) | ||
# initialization is robust to "anywhere in repo" | ||
testdir = annexrepo / 'somedir' | ||
testdir.mkdir() | ||
annex1 = Annex(testdir) | ||
assert annex0.path == annex1.path == annexrepo | ||
assert annex0.uuid | ||
|
||
|
||
def test_repo_init_annex_error(baregitrepo): | ||
repo = Repo(baregitrepo) | ||
# we take the place of the annex | ||
(repo.path / 'annex').touch() | ||
with pytest.raises(CommandError, match='fileExist'): | ||
repo.init_annex() | ||
|
||
|
||
def test_worktree_init_annex(gitrepo): | ||
wt = Worktree(gitrepo) | ||
annex = wt.init_annex() | ||
assert wt.annex is annex | ||
assert wt.annex.uuid | ||
|
||
|
||
def test_repo_init_annex(baregitrepo): | ||
repo = Repo(baregitrepo) | ||
assert repo.bare_annex is None | ||
|
||
# setting the flag has no effect here, it just exercises the | ||
# code path | ||
annex = repo.init_annex('testannex', autoenable_remotes=False) | ||
assert annex is repo.bare_annex | ||
assert repo.bare_annex.uuid | ||
# ask again to execise cached code path | ||
assert repo.bare_annex.uuid | ||
|
||
|
||
def test_relocate_repo_w_annex(tmp_path): | ||
wt_dir = tmp_path / 'wt' | ||
orig_repo_dir = tmp_path / 'orig_repo' | ||
new_repo_dir = tmp_path / 'new_repo' | ||
|
||
wt_dir.mkdir() | ||
|
||
wt = Worktree.init_at(wt_dir, gitdir=orig_repo_dir) | ||
wt.init_annex() | ||
assert wt.repo.path == orig_repo_dir | ||
# annex commands run in the context of the worktree, not the repo | ||
assert wt.annex.path == wt.path | ||
annexes = wt.annex.list_annexes() | ||
assert len(annexes) > 1 | ||
|
||
wt_new = Worktree.init_at(wt_dir, gitdir=new_repo_dir) | ||
assert wt_new.repo.path == new_repo_dir | ||
assert (new_repo_dir / 'annex').is_dir() | ||
assert wt_new.annex.path == wt.path | ||
# running annex commands continues to work after relocation | ||
assert annexes == wt_new.annex.list_annexes() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters