diff --git a/src/nrp_devtools/pypi_proxy/proxy.py b/src/nrp_devtools/pypi_proxy/proxy.py index a8d1c6c..ce1a78e 100644 --- a/src/nrp_devtools/pypi_proxy/proxy.py +++ b/src/nrp_devtools/pypi_proxy/proxy.py @@ -20,11 +20,7 @@ def welcome(): @app.route('/simple') def simple(): url = current_app.config["PYPI_SERVER_URL"] - content_html = requests.get(url).text - content_html = content_html.replace('
', '
') - parser = etree.HTMLParser() - html_root = etree.fromstring(content_html, parser) - atags = html_root.findall(".//a") + atags, html_root = read_url_links(url) pypi_packages = set() for x in atags: package_name = x.attrib["href"].strip('/').split('/')[-1] @@ -33,6 +29,17 @@ def simple(): current_app.pypi_packages = pypi_packages return etree.tostring(html_root, encoding=str), 200, {"Content-Type": "text/html"} + +def read_url_links(url): + content_html = requests.get(url).text + # replace malformed
tags, without this they would get stripped out + content_html = content_html.replace('
', '
') + parser = etree.HTMLParser() + html_root = etree.fromstring(content_html, parser) + atags = html_root.findall(".//a") + return atags, html_root + + @app.route('/simple//') def package(package): if not hasattr(current_app, "pypi_packages"): @@ -41,11 +48,7 @@ def package(package): return "Package not found", 404, {"Content-Type": "text/plain"} url = f"{current_app.config['PYPI_SERVER_URL']}/{package}" - content_html = requests.get(url).text - content_html = content_html.replace('
', '
') - parser = etree.HTMLParser() - html_root = etree.fromstring(content_html, parser) - atags = html_root.findall(".//a") + atags, html_root = read_url_links(url) for x in atags: x.attrib["href"] = f"/simple/{package}/{x.attrib['href'].split('/')[-1]}" return etree.tostring(html_root, encoding=str), 200, {"Content-Type": "text/html"}