Skip to content

Commit

Permalink
duplicate: generalize to multiple repos per team
Browse files Browse the repository at this point in the history
  • Loading branch information
scivision committed Jun 19, 2024
1 parent d2ecd8f commit f829b74
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 56 deletions.
27 changes: 17 additions & 10 deletions Github/DuplicateGithubRepos.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,34 @@
#!/usr/bin/env python
#!/usr/bin/env python3
"""
Duplicate repos specified in spreadsheet.
Requires GitHub Oauth login.
The Oauth file should be in a secure place, NOT in a Git repo!
Maybe encrypted and with permissions 600.
The Oauth key must have "repo" checked, or you'll get 404 error on user.create_repo().
Assumes you have an SSH key loaded for git push --mirror step
Assumes an SSH key loaded for
git push --mirror
Example:
python DuplicateGithubRepos.py book.xlsx ~/.ssh/oauth orgname prefix
"""

from pathlib import Path
from argparse import ArgumentParser

import pandas

import gitbulk.duplicator as gu
import gitbulk as gb


p = ArgumentParser(description="Duplicate repos specified in spreadsheet")
p.add_argument("fn", help="spreadsheet filename")
p.add_argument("oauth", help="Oauth filename")
p.add_argument("-u", "--username", help="username or organization to create duplicate under")
p.add_argument("-s", "--stem", help="beginning of duplicated repo names")
p.add_argument("-w", "--sheet", help="excel sheet to process", required=True)
p.add_argument("username", help="username or organization to create duplicate under")
p.add_argument("stem", help="beginning of duplicated repo names")
P = p.parse_args()

repos = gb.read_repos(P.fn, P.sheet)
fn = Path(P.fn).expanduser()
repos = pandas.read_excel(fn, index_col=0, header=0).squeeze()

gu.repo_dupe(repos, P.oauth, P.username, P.stem)
2 changes: 1 addition & 1 deletion Github/SetArchive.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from argparse import ArgumentParser

import gitbulk.base as gb
import gitbulk as gb


def main():
Expand Down
27 changes: 0 additions & 27 deletions src/gitbulk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from datetime import datetime
import logging
import typing as T
import pandas

import github

Expand All @@ -25,7 +24,6 @@
"session",
"get_repos",
"user_or_org",
"read_repos",
"get_collabs",
]

Expand Down Expand Up @@ -260,31 +258,6 @@ def user_or_org(g: github.Github, user: str) -> T.Any:
return g.get_user(user)


def read_repos(fn: Path, sheet: str) -> dict[str, str]:
"""
make pandas.Series of email/id, Git url from spreadsheet
Parameters
----------
fn : pathlib.Path
path to Excel spreadsheet listing usernames and repos to duplicate
sheet : str
name of Excel sheet to use
Results
-------
repos : dict
all the repos to duplicate
"""

# %% get list of repos to duplicate
fn = Path(fn).expanduser()
repos = pandas.read_excel(fn, sheet_name=sheet, index_col=0, usecols="A, D").squeeze()
repos.dropna(how="any", inplace=True)

return repos.to_dict()


def get_repos(userorg: github.NamedUser.NamedUser) -> T.Iterable[github.Repository.Repository]:
"""
get list of Repositories for a user or organization
Expand Down
45 changes: 28 additions & 17 deletions src/gitbulk/duplicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@
import webbrowser
import shutil
import functools
import math

from .base import connect, check_api_limit, last_commit_date, repo_exists
import pandas
import github

from . import connect, check_api_limit, last_commit_date, repo_exists


@functools.cache
Expand All @@ -22,43 +26,50 @@ def git_exe() -> str:
return git


def repo_dupe(repos: dict[str, str], oauth: Path, orgname: str | None = None, stem: str = ""):
def repo_dupe(repos: pandas.DataFrame, oauth: Path, orgname: str, stem: str):
"""
Duplicate GitHub repos AND their wikis
Parameters
----------
repos: dict of str, str
GitHub username, reponame to duplicate
repos: pandas.DataFrame
GitHub username, repo(s) to duplicate
oauth: pathlib.Path
GitHub Oauth token https://github.com/settings/tokens
orgname: str
create repos under Organization instead of username
stem: str
what to start new repo name with
"""
# %% authenticate

op, sess = connect(oauth, orgname)

username = op.login

# %% prepare to loop over repos
for email, oldurl in repos.items():
check_api_limit(sess)
for old_urls in repos.itertuples(index=False, name=None):
for old_url in old_urls:
if isinstance(old_url, float) and math.isnan(old_url):
continue

oldurl = old_url.replace("https://", "ssh://", 1)
oldname = "/".join(oldurl.split("/")[-2:]).split(".")[0]

oldurl = oldurl.replace("https", "ssh")
oldname = "/".join(oldurl.split("/")[-2:]).split(".")[0]
check_api_limit(sess)
try:
time = last_commit_date(sess, oldname)
except github.UnknownObjectException:
logging.error(f"{oldname} not found")
continue

oldtime = last_commit_date(sess, oldname)
if oldtime is None:
continue
if time is None:
continue

mirrorname = stem + email
mirrorname = stem

gitdupe(oldurl, oldtime, username, mirrorname, op)
gitdupe(oldurl, None, username, mirrorname, op, iswiki=True)
gitdupe(oldurl, time, username, mirrorname, op)
gitdupe(oldurl, None, username, mirrorname, op, iswiki=True)

sleep(0.1)
sleep(0.1)


def gitdupe(
Expand Down
2 changes: 1 addition & 1 deletion src/gitbulk/repo_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import github
import logging

from .base import check_api_limit, session, get_repos, user_or_org
from . import check_api_limit, session, get_repos, user_or_org


def repo_prober(
Expand Down

0 comments on commit f829b74

Please sign in to comment.