From f829b7402d55543704d76da9c6a28c8ca086e7ff Mon Sep 17 00:00:00 2001
From: scivision <scivision@users.noreply.github.com>
Date: Wed, 19 Jun 2024 18:42:38 -0400
Subject: [PATCH] duplicate: generalize to multiple repos per team

---
 Github/DuplicateGithubRepos.py | 27 ++++++++++++--------
 Github/SetArchive.py           |  2 +-
 src/gitbulk/__init__.py        | 27 --------------------
 src/gitbulk/duplicator.py      | 45 +++++++++++++++++++++-------------
 src/gitbulk/repo_stats.py      |  2 +-
 5 files changed, 47 insertions(+), 56 deletions(-)

diff --git a/Github/DuplicateGithubRepos.py b/Github/DuplicateGithubRepos.py
index af83a45..386341a 100644
--- a/Github/DuplicateGithubRepos.py
+++ b/Github/DuplicateGithubRepos.py
@@ -1,27 +1,34 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 """
 Duplicate repos specified in spreadsheet.
-Requires GitHub Oauth login.
 
-The Oauth file should be in a secure place, NOT in a Git repo!
-Maybe encrypted and with permissions 600.
 The Oauth key must have "repo" checked, or you'll get 404 error on user.create_repo().
 
-Assumes you have an SSH key loaded for git push --mirror step
+Assumes an SSH key loaded for
+
+    git push --mirror
+
+Example:
+
+    python DuplicateGithubRepos.py book.xlsx ~/.ssh/oauth orgname prefix
 """
+
+from pathlib import Path
 from argparse import ArgumentParser
+
+import pandas
+
 import gitbulk.duplicator as gu
-import gitbulk as gb
 
 
 p = ArgumentParser(description="Duplicate repos specified in spreadsheet")
 p.add_argument("fn", help="spreadsheet filename")
 p.add_argument("oauth", help="Oauth filename")
-p.add_argument("-u", "--username", help="username or organization to create duplicate under")
-p.add_argument("-s", "--stem", help="beginning of duplicated repo names")
-p.add_argument("-w", "--sheet", help="excel sheet to process", required=True)
+p.add_argument("username", help="username or organization to create duplicate under")
+p.add_argument("stem", help="beginning of duplicated repo names")
 P = p.parse_args()
 
-repos = gb.read_repos(P.fn, P.sheet)
+fn = Path(P.fn).expanduser()
+repos = pandas.read_excel(fn, index_col=0, header=0).squeeze()
 
 gu.repo_dupe(repos, P.oauth, P.username, P.stem)
diff --git a/Github/SetArchive.py b/Github/SetArchive.py
index efce6f7..73c844e 100644
--- a/Github/SetArchive.py
+++ b/Github/SetArchive.py
@@ -12,7 +12,7 @@
 
 from argparse import ArgumentParser
 
-import gitbulk.base as gb
+import gitbulk as gb
 
 
 def main():
diff --git a/src/gitbulk/__init__.py b/src/gitbulk/__init__.py
index a2d0b6b..93cac44 100644
--- a/src/gitbulk/__init__.py
+++ b/src/gitbulk/__init__.py
@@ -9,7 +9,6 @@
 from datetime import datetime
 import logging
 import typing as T
-import pandas
 
 import github
 
@@ -25,7 +24,6 @@
     "session",
     "get_repos",
     "user_or_org",
-    "read_repos",
     "get_collabs",
 ]
 
@@ -260,31 +258,6 @@ def user_or_org(g: github.Github, user: str) -> T.Any:
         return g.get_user(user)
 
 
-def read_repos(fn: Path, sheet: str) -> dict[str, str]:
-    """
-    make pandas.Series of email/id, Git url from spreadsheet
-
-    Parameters
-    ----------
-    fn : pathlib.Path
-        path to Excel spreadsheet listing usernames and repos to duplicate
-    sheet : str
-        name of Excel sheet to use
-
-    Results
-    -------
-    repos : dict
-        all the repos to duplicate
-    """
-
-    # %% get list of repos to duplicate
-    fn = Path(fn).expanduser()
-    repos = pandas.read_excel(fn, sheet_name=sheet, index_col=0, usecols="A, D").squeeze()
-    repos.dropna(how="any", inplace=True)
-
-    return repos.to_dict()
-
-
 def get_repos(userorg: github.NamedUser.NamedUser) -> T.Iterable[github.Repository.Repository]:
     """
     get list of Repositories for a user or organization
diff --git a/src/gitbulk/duplicator.py b/src/gitbulk/duplicator.py
index 4df009b..cdade5f 100644
--- a/src/gitbulk/duplicator.py
+++ b/src/gitbulk/duplicator.py
@@ -7,8 +7,12 @@
 import webbrowser
 import shutil
 import functools
+import math
 
-from .base import connect, check_api_limit, last_commit_date, repo_exists
+import pandas
+import github
+
+from . import connect, check_api_limit, last_commit_date, repo_exists
 
 
 @functools.cache
@@ -22,14 +26,14 @@ def git_exe() -> str:
     return git
 
 
-def repo_dupe(repos: dict[str, str], oauth: Path, orgname: str | None = None, stem: str = ""):
+def repo_dupe(repos: pandas.DataFrame, oauth: Path, orgname: str, stem: str):
     """
     Duplicate GitHub repos AND their wikis
 
     Parameters
     ----------
-    repos: dict of str, str
-        GitHub username, reponame to duplicate
+    repos: pandas.DataFrame
+        GitHub username, repo(s) to duplicate
     oauth: pathlib.Path
         GitHub Oauth token  https://github.com/settings/tokens
     orgname: str
@@ -37,28 +41,35 @@ def repo_dupe(repos: dict[str, str], oauth: Path, orgname: str | None = None, st
     stem: str
         what to start new repo name with
     """
-    # %% authenticate
+
     op, sess = connect(oauth, orgname)
 
     username = op.login
 
-    # %% prepare to loop over repos
-    for email, oldurl in repos.items():
-        check_api_limit(sess)
+    for old_urls in repos.itertuples(index=False, name=None):
+        for old_url in old_urls:
+            if isinstance(old_url, float) and math.isnan(old_url):
+                continue
+
+            oldurl = old_url.replace("https://", "ssh://", 1)
+            oldname = "/".join(oldurl.split("/")[-2:]).split(".")[0]
 
-        oldurl = oldurl.replace("https", "ssh")
-        oldname = "/".join(oldurl.split("/")[-2:]).split(".")[0]
+            check_api_limit(sess)
+            try:
+                time = last_commit_date(sess, oldname)
+            except github.UnknownObjectException:
+                logging.error(f"{oldname} not found")
+                continue
 
-        oldtime = last_commit_date(sess, oldname)
-        if oldtime is None:
-            continue
+            if time is None:
+                continue
 
-        mirrorname = stem + email
+            mirrorname = stem
 
-        gitdupe(oldurl, oldtime, username, mirrorname, op)
-        gitdupe(oldurl, None, username, mirrorname, op, iswiki=True)
+            gitdupe(oldurl, time, username, mirrorname, op)
+            gitdupe(oldurl, None, username, mirrorname, op, iswiki=True)
 
-        sleep(0.1)
+            sleep(0.1)
 
 
 def gitdupe(
diff --git a/src/gitbulk/repo_stats.py b/src/gitbulk/repo_stats.py
index 24b120f..20bc391 100644
--- a/src/gitbulk/repo_stats.py
+++ b/src/gitbulk/repo_stats.py
@@ -7,7 +7,7 @@
 import github
 import logging
 
-from .base import check_api_limit, session, get_repos, user_or_org
+from . import check_api_limit, session, get_repos, user_or_org
 
 
 def repo_prober(