chore: testing workflow

openedx · Apr 18, 2024 · ec021b3 · ec021b3
1 parent 71d7349
commit ec021b3
Showing 1 changed file with 4 additions and 175 deletions.
diff --git a/scripts/find_dependencies.py b/scripts/find_dependencies.py
@@ -69,28 +69,6 @@ def run_command(cmd: str, outfile=None) -> Tuple[bool, str]:
 
     return proc.returncode == 0, output.strip()
 
-
-# @cached
-# def npm_repo_url(npm_spec: str) -> Optional[str]:
-#     """Given 'jspkg@0.1.0', return a repo url."""
-#     pkg, _, ver = npm_spec.rpartition("@")
-#     url = f"https://registry.npmjs.org/{pkg}/{ver}"
-#     try:
-#         resp = requests.get(url, timeout=60)
-#         if resp.status_code != 200:
-#             print(f"{npm_spec}: {url} -> {resp.status_code}")
-#             return None
-#         jdata = resp.json()
-#     except requests.RequestException as exc:
-#         print(f"Couldn't fetch npm data for {npm_spec}: {exc}")
-#         return None
-#     repo = jdata.get("repository")
-#     if repo is None:
-#         return None
-#     if isinstance(repo, dict):
-#         repo = repo["url"]
-#     return repo
-
 def canonical_url(url: str) -> str:
     """Canonicalize a repo URL, probably on GitHub."""
     for pat, repl in [
@@ -106,27 +84,6 @@ def canonical_url(url: str) -> str:
         url = f"https://github.com/{url}"
     return url
 
-# @cached
-# def find_real_url(url: str) -> Optional[str]:
-#     """Find the eventual real url for a redirected url."""
-#     while True:
-#         try:
-#             resp = requests.head(url, timeout=60, allow_redirects=True)
-#         except requests.RequestException as exc:
-#             print(f"Couldn't fetch {url}: {exc}")
-#             return None
-#         if resp.status_code == 429:
-#             # I didn't know you could get 429 from https://github.com, but you can...
-#             wait = int(resp.headers.get("Retry-After", 10))
-#             time.sleep(wait + 1)
-#         else:
-#             break
-
-#     if resp.status_code == 200:
-#         return resp.url
-#     return None
-
-
 WORK_DIR = Path("/tmp/unpack_reqs")
 
 def parallel_map(func, data, description):
@@ -140,106 +97,6 @@ def parallel_map(func, data, description):
                 progress.update(pbar, advance=1)
                 yield result
 
-# def write_list(path: str, lines: Iterable[str]):
-#     """Write a list of strings to a file."""
-#     with Path(path).open("w") as flist:
-#         for line in lines:
-#             print(line, file=flist)
-
-# def check_js_dependencies() -> Iterable[str]:
-#     """Check the JS dependencies in package-lock.json, returning a set of repo URLs."""
-#     print("Checking JavaScript dependencies")
-#     with Path("package-lock.json").open() as lockf:
-#         lock_data = json.load(lockf)
-
-#     deps = set()
-#     for name, pkg in lock_data["packages"].items():
-#         name = pkg.get("name") or name
-#         name = name.rpartition("node_modules/")[-1]
-#         version = pkg.get("version")
-#         if version is None:
-#             continue
-#         deps.add(f"{name}@{version}")
-#     write_list("deps.txt", sorted(deps))
-
-#     urls = set()
-#     for url in parallel_map(npm_repo_url, deps, "Getting npm URLs"):
-#         if url:
-#             urls.add(canonical_url(url))
-
-#     real_urls = set()
-#     for url in parallel_map(find_real_url, urls, "Getting real URLs"):
-#         if url:
-#             real_urls.add(url)
-
-#     print(f"{len(deps)} deps, {len(urls)} urls, {len(real_urls)} real urls")
-#     write_list("repo_urls.txt", sorted(real_urls))
-#     return real_urls
-
-# def check_py_dependencies() -> Iterable[str]:
-#     """Check the Python dependencies in base.txt, returning a set of repo URLs."""
-#     print("Checking Python dependencies")
-
-#     print("Creating venv")
-#     run_command("python3 -m venv .venv", "make_venv.log")
-#     run_command(".venv/bin/python3 -m pip install -U pip", "pip_upgrade.log")
-#     print("Downloading packages")
-#     run_command(".venv/bin/python3 -m pip download --dest files -r base.txt", "pip_download.log")
-
-#     urls = set()
-#     for url in parallel_map(repo_url_from_wheel, Path("files").glob("*.whl"), "Examining wheels"):
-#         if url:
-#             urls.add(canonical_url(url))
-
-#     for url in parallel_map(repo_url_from_tgz, Path("files").glob("*.tar.gz"), "Examining tar.gz"):
-#         if url:
-#             urls.add(canonical_url(url))
-
-#     with open("base.txt") as fbase:
-#         for line in fbase:
-#             if match := re.search(r"https://github.com[^@ #]*(\.git)?", line):
-#                 urls.add(canonical_url(match[0]))
-
-#     real_urls = set()
-#     for url in parallel_map(find_real_url, urls, "Getting real URLs"):
-#         if url:
-#             real_urls.add(url)
-
-#     write_list("repo_urls.txt", sorted(real_urls))
-#     return real_urls
-
-# def matching_text(text, regexes):
-#     """Find a line in text matching a regex, and return the first regex group."""
-#     for regex in regexes:
-#         for line in text.splitlines():
-#             if match := re.search(regex, line):
-#                 return match[1]
-#     return None
-
-# @cached
-# def repo_url_from_wheel(wheel_path: str) -> Optional[str]:
-#     """Read metadata from a .whl file, returning the repo URL."""
-#     with zipfile.ZipFile(wheel_path) as whl_file:
-#         fmetadata = next((f for f in whl_file.namelist() if f.endswith("/METADATA")), None)
-#         if fmetadata is None:
-#             print(f"No metadata in {wheel_path}")
-#             return None
-#         with whl_file.open(fmetadata) as inner_file:
-#             metadata = inner_file.read().decode("utf-8")
-#         return repo_url_from_metadata(wheel_path, metadata)
-
-# @cached
-# def repo_url_from_tgz(tgz_path: str) -> Optional[str]:
-#     """Read metadata from a .tar.gz file, returning the repo URL."""
-#     with tarfile.open(tgz_path) as tgz_file:
-#         fmetadata = next((f for f in tgz_file.getnames() if f.endswith("/PKG-INFO")), None)
-#         if fmetadata is None:
-#             print(f"No metadata in {tgz_path}")
-#             return None
-#         metadata = tgz_file.extractfile(fmetadata).read().decode("utf-8")
-#         return repo_url_from_metadata(tgz_path, metadata)
-
-
 SOURCE_URL_REGEXES = [
     # These regexes are tried in order. The first group is the extracted URL.
     r"(?i)^Project-URL: Source.*,\s*(.*)$",
@@ -339,19 +196,11 @@ def process_directory():
     Also copies the considered dependencies file into the temp work directory,
     for later analysis.
     """    
-    # repo_name = Path.cwd().name
-    # repo_work = WORK_DIR / repo_name
-    # repo_work.mkdir(parents=True, exist_ok=True)
+
     repo_urls = set()
     package_names = []
     openedx_packages = [] 
-    # if (js_reqs := Path("package-lock.json")).exists():
-    #     shutil.copyfile(js_reqs, repo_work / "package-lock.json")
-        # with change_dir(repo_work):
-            # repo_urls.update(check_js_dependencies())
-    # if (py_reqs := find_py_reqs()):
-    #     shutil.copyfile(py_reqs, repo_work / "base.txt")
-
+
     with open("/tmp/unpack_reqs/openedx/edx-platform/base.txt") as fbase:
         # Read each line (package name) in the file
         file_data = fbase.read()
@@ -391,40 +240,20 @@ def urls_in_orgs(urls, orgs):
         if any(f"/{org}/" in url for org in orgs)
     )
 
-# def urls_in_orgs(urls, org):
-#     """
-#     Find urls that are in any of the `orgs`.
-#     """
-#     return sorted(
-#         url for url in urls
-#         if f"/{org}/" in url
-#     )    
-
 
 def main(dirs=None, org=None):
     """
     Analyze the requirements in all of the directories mentioned on the command line.
     If arguments have newlines, treat each line as a separate directory.
     """
-    # if dirs is None:
-    #     repo_dir = sys.argv[1]
-    #     org_flag_index = sys.argv.index("--org")
-    #     org = sys.argv[org_flag_index + 1]
-    #print(f"Creating new work directory: {WORK_DIR}")
-    #shutil.rmtree(WORK_DIR, ignore_errors=True)
     repo_urls = set()
-
-    #with change_dir(repo_dir):
     repo_urls.update(process_directory())
 
-
-
-
     print("== DONE ==============")
     print("Second-party:")
     print("\n".join(repo_urls))
-    # if repo_urls:
-    #     sys.exit(1)
+    if repo_urls:
+        sys.exit(1)
 
 if __name__ == "__main__":
     main()