From f829b7402d55543704d76da9c6a28c8ca086e7ff Mon Sep 17 00:00:00 2001 From: scivision Date: Wed, 19 Jun 2024 18:42:38 -0400 Subject: [PATCH] duplicate: generalize to multiple repos per team --- Github/DuplicateGithubRepos.py | 27 ++++++++++++-------- Github/SetArchive.py | 2 +- src/gitbulk/__init__.py | 27 -------------------- src/gitbulk/duplicator.py | 45 +++++++++++++++++++++------------- src/gitbulk/repo_stats.py | 2 +- 5 files changed, 47 insertions(+), 56 deletions(-) diff --git a/Github/DuplicateGithubRepos.py b/Github/DuplicateGithubRepos.py index af83a45..386341a 100644 --- a/Github/DuplicateGithubRepos.py +++ b/Github/DuplicateGithubRepos.py @@ -1,27 +1,34 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ Duplicate repos specified in spreadsheet. -Requires GitHub Oauth login. -The Oauth file should be in a secure place, NOT in a Git repo! -Maybe encrypted and with permissions 600. The Oauth key must have "repo" checked, or you'll get 404 error on user.create_repo(). -Assumes you have an SSH key loaded for git push --mirror step +Assumes an SSH key loaded for + + git push --mirror + +Example: + + python DuplicateGithubRepos.py book.xlsx ~/.ssh/oauth orgname prefix """ + +from pathlib import Path from argparse import ArgumentParser + +import pandas + import gitbulk.duplicator as gu -import gitbulk as gb p = ArgumentParser(description="Duplicate repos specified in spreadsheet") p.add_argument("fn", help="spreadsheet filename") p.add_argument("oauth", help="Oauth filename") -p.add_argument("-u", "--username", help="username or organization to create duplicate under") -p.add_argument("-s", "--stem", help="beginning of duplicated repo names") -p.add_argument("-w", "--sheet", help="excel sheet to process", required=True) +p.add_argument("username", help="username or organization to create duplicate under") +p.add_argument("stem", help="beginning of duplicated repo names") P = p.parse_args() -repos = gb.read_repos(P.fn, P.sheet) +fn = Path(P.fn).expanduser() +repos = pandas.read_excel(fn, index_col=0, header=0).squeeze() gu.repo_dupe(repos, P.oauth, P.username, P.stem) diff --git a/Github/SetArchive.py b/Github/SetArchive.py index efce6f7..73c844e 100644 --- a/Github/SetArchive.py +++ b/Github/SetArchive.py @@ -12,7 +12,7 @@ from argparse import ArgumentParser -import gitbulk.base as gb +import gitbulk as gb def main(): diff --git a/src/gitbulk/__init__.py b/src/gitbulk/__init__.py index a2d0b6b..93cac44 100644 --- a/src/gitbulk/__init__.py +++ b/src/gitbulk/__init__.py @@ -9,7 +9,6 @@ from datetime import datetime import logging import typing as T -import pandas import github @@ -25,7 +24,6 @@ "session", "get_repos", "user_or_org", - "read_repos", "get_collabs", ] @@ -260,31 +258,6 @@ def user_or_org(g: github.Github, user: str) -> T.Any: return g.get_user(user) -def read_repos(fn: Path, sheet: str) -> dict[str, str]: - """ - make pandas.Series of email/id, Git url from spreadsheet - - Parameters - ---------- - fn : pathlib.Path - path to Excel spreadsheet listing usernames and repos to duplicate - sheet : str - name of Excel sheet to use - - Results - ------- - repos : dict - all the repos to duplicate - """ - - # %% get list of repos to duplicate - fn = Path(fn).expanduser() - repos = pandas.read_excel(fn, sheet_name=sheet, index_col=0, usecols="A, D").squeeze() - repos.dropna(how="any", inplace=True) - - return repos.to_dict() - - def get_repos(userorg: github.NamedUser.NamedUser) -> T.Iterable[github.Repository.Repository]: """ get list of Repositories for a user or organization diff --git a/src/gitbulk/duplicator.py b/src/gitbulk/duplicator.py index 4df009b..cdade5f 100644 --- a/src/gitbulk/duplicator.py +++ b/src/gitbulk/duplicator.py @@ -7,8 +7,12 @@ import webbrowser import shutil import functools +import math -from .base import connect, check_api_limit, last_commit_date, repo_exists +import pandas +import github + +from . import connect, check_api_limit, last_commit_date, repo_exists @functools.cache @@ -22,14 +26,14 @@ def git_exe() -> str: return git -def repo_dupe(repos: dict[str, str], oauth: Path, orgname: str | None = None, stem: str = ""): +def repo_dupe(repos: pandas.DataFrame, oauth: Path, orgname: str, stem: str): """ Duplicate GitHub repos AND their wikis Parameters ---------- - repos: dict of str, str - GitHub username, reponame to duplicate + repos: pandas.DataFrame + GitHub username, repo(s) to duplicate oauth: pathlib.Path GitHub Oauth token https://github.com/settings/tokens orgname: str @@ -37,28 +41,35 @@ def repo_dupe(repos: dict[str, str], oauth: Path, orgname: str | None = None, st stem: str what to start new repo name with """ - # %% authenticate + op, sess = connect(oauth, orgname) username = op.login - # %% prepare to loop over repos - for email, oldurl in repos.items(): - check_api_limit(sess) + for old_urls in repos.itertuples(index=False, name=None): + for old_url in old_urls: + if isinstance(old_url, float) and math.isnan(old_url): + continue + + oldurl = old_url.replace("https://", "ssh://", 1) + oldname = "/".join(oldurl.split("/")[-2:]).split(".")[0] - oldurl = oldurl.replace("https", "ssh") - oldname = "/".join(oldurl.split("/")[-2:]).split(".")[0] + check_api_limit(sess) + try: + time = last_commit_date(sess, oldname) + except github.UnknownObjectException: + logging.error(f"{oldname} not found") + continue - oldtime = last_commit_date(sess, oldname) - if oldtime is None: - continue + if time is None: + continue - mirrorname = stem + email + mirrorname = stem - gitdupe(oldurl, oldtime, username, mirrorname, op) - gitdupe(oldurl, None, username, mirrorname, op, iswiki=True) + gitdupe(oldurl, time, username, mirrorname, op) + gitdupe(oldurl, None, username, mirrorname, op, iswiki=True) - sleep(0.1) + sleep(0.1) def gitdupe( diff --git a/src/gitbulk/repo_stats.py b/src/gitbulk/repo_stats.py index 24b120f..20bc391 100644 --- a/src/gitbulk/repo_stats.py +++ b/src/gitbulk/repo_stats.py @@ -7,7 +7,7 @@ import github import logging -from .base import check_api_limit, session, get_repos, user_or_org +from . import check_api_limit, session, get_repos, user_or_org def repo_prober(