Skip to content

Commit

Permalink
Fix and improve docstrings, migrate endpoints to enums.
Browse files Browse the repository at this point in the history
  - Endpoints are now an Enum specified in http_get_models.
  • Loading branch information
terjekv committed Sep 16, 2023
1 parent 708a790 commit 5ae174b
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 56 deletions.
20 changes: 20 additions & 0 deletions cvmfsscraper/http_get_models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Pydantic models for CVMFS HTTP responses."""
import re
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional

from pydantic import BaseModel, Field, field_validator, model_validator
Expand Down Expand Up @@ -279,3 +280,22 @@ def validate_hex(cls, value: str):
raise ValueError(f"{value} is not a valid hex string")

return value


class Endpoints(Enum):
"""Endpoint mapping.
We map endpoints to a tuple of (path, model_class) where path is the
path to the endpoint, and model_class is the Pydantic model that
corresponds to the response.
"""

REPOSITORIES_JSON = ("info/v1/repositories.json", GetCVMFSRepositoriesJSON)
CVMFS_STATUS_JSON = ("{repo}/.cvmfs_status.json", GetCVMFSStatusJSON)
GEOAPI = ("{repo}/api/v1.0/geo/x/{geoapi_str}", GetGeoAPI)
CVMFS_PUBLISHED = ("{repo}/.cvmfspublished", GetCVMFSPublished)

def __init__(self, path: str, model_class: type[BaseModel]):
"""Initialize the endpoint."""
self.path = path
self.model_class = model_class
12 changes: 8 additions & 4 deletions cvmfsscraper/repository.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
"""A CVMFS repository."""
from typing import Dict

from cvmfsscraper.http_get_models import GetCVMFSPublished, GetCVMFSStatusJSON
from cvmfsscraper.http_get_models import (
Endpoints,
GetCVMFSPublished,
GetCVMFSStatusJSON,
)
from cvmfsscraper.tools import warn


Expand Down Expand Up @@ -130,14 +134,14 @@ def fetch_cvmfspublished(self) -> GetCVMFSPublished:
:returns: A GetCVMFSPublished object.
"""
return self.server.fetch_endpoint(".cvmfspublished", self.name)
return self.server.fetch_endpoint(Endpoints.CVMFS_PUBLISHED, self.name)

def fetch_repository(self) -> GetCVMFSStatusJSON:
"""Fetch a repository by name.
raises: urlllib.error.URLError (or a subclass thereof) for URL errors.
pydantic.ValidationError if the object creation fails.
:returns: A RepositoryOrReplica object.
:returns: GetCVMFSStatusJSON object.
"""
return self.server.fetch_endpoint(".cvmfs_status.json", self.name)
return self.server.fetch_endpoint(Endpoints.CVMFS_STATUS_JSON, self.name)
92 changes: 42 additions & 50 deletions cvmfsscraper/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
from urllib import error, request

from cvmfsscraper.constants import GeoAPIStatus
from cvmfsscraper.exceptions import CVMFSFetchError
from cvmfsscraper.http_get_models import (
CVMFSBaseModel,
Endpoints,
GetCVMFSPublished,
GetCVMFSRepositoriesJSON,
GetCVMFSStatusJSON,
Expand Down Expand Up @@ -197,7 +196,7 @@ def fetch_repositories_json(self) -> GetCVMFSRepositoriesJSON:
returns: A GetCVMFSRepositoriesJSON object.
"""
return self.fetch_endpoint("repositories.json")
return self.fetch_endpoint(Endpoints.REPOSITORIES_JSON)

def fetch_geoapi(self, repo: Repository) -> GetGeoAPI:
"""Fetch the GeoAPI host ordering.
Expand All @@ -207,73 +206,66 @@ def fetch_geoapi(self, repo: Repository) -> GetGeoAPI:
:returns: A GetGeoAPI object.
"""
return self.fetch_endpoint(
"geoapi", repo=repo.name, geoapi_servers=GEOAPI_SERVERS
)
return self.fetch_endpoint(Endpoints.GEOAPI, repo=repo.name)

def fetch_endpoint(
self,
endpoint: str,
endpoint: Endpoints,
repo: str = "data",
geoapi_servers: str = GEOAPI_SERVERS,
raw: bool = False,
) -> CVMFSBaseModel:
"""Read and return the content of a specified file.
This function reads the content of a specified file from a
test data directory and returns it. Depending on the file, it returns
either bytes or a string.
:param server: The server where the data resides.
:param file: The name of the file to read.
:param repo: The repository where the data resides. Default is "data".
:raises: FileNotFoundError: if the data file is not found.
:returns: Content of the file as either bytes or string.
) -> Union[
GetCVMFSPublished, GetCVMFSRepositoriesJSON, GetCVMFSStatusJSON, GetGeoAPI
]:
"""Fetch and process a specified URL endpoint.
This function reads the content of a specified URL and ether returns a validated
CVMFS pydantic model representing the data from the endpoint, or throws an
exception.
Note: We are deducing the content type from the URL itself. This is due to cvmfs
files always returns application/x-cvmfs no matter its content.
:param endpoint: The endpoint to fetch, as an Endpoints enum value.
:param repo: The repository used for the endpoint, if relevant. Required for
all but Endpoints.REPOSITORIES_JSON. Defaults to "data".
:param geoapi_servers: Specift the list of DNS names of geoapi servers to use for
the geoapi endpoint. Defaults to GEOAPI_SERVERS.
:raises: PydanticValidationError: If the object creation fails.
CVMFSFetchError: If the endpoint is unknown.
urllib.error.URLError (or a subclass thereof): If the URL fetch fails.
TypeError: If the endpoint is not an Endpoints enum value.
:returns: An endpoint-specific pydantic model, one of:
GetCVMFSPublished (Endpoints.CVMFS_PUBLISHED)
GetCVMFSRepositoriesJSON (Endpoints.REPOSITORIS_JSON)
GetCVMFSStatusJSON (Endpoints.CVMFS_STATUS_JSON)
GetGeoAPI (Endpoints.GEOAPI)
"""
# We do this validation in case someone passes a string instead of an enum value
if not isinstance(endpoint, Endpoints): # type: ignore
raise TypeError("endpoint must be an Endpoints enum value")

geoapi_str = ",".join(geoapi_servers)
formatted_path = endpoint.path.format(repo=repo, geoapi_str=geoapi_str)
url = f"{self.url()}/cvmfs/{formatted_path}"

# Lookup table for filenames and their paths. Binary read flag is optional.
lookup: Dict[str, Dict[str, Union[str, CVMFSBaseModel]]] = {
"repositories.json": {
"path": "info/v1/repositories.json",
"class": GetCVMFSRepositoriesJSON,
},
".cvmfs_status.json": {
"path": f"{repo}/.cvmfs_status.json",
"class": GetCVMFSStatusJSON,
},
"geoapi": {
"path": f"{repo}/api/v1.0/geo/x/{geoapi_str}",
"class": GetGeoAPI,
},
".cvmfspublished": {
"path": f"{repo}/.cvmfspublished",
"class": GetCVMFSPublished,
},
}

if endpoint not in lookup:
raise CVMFSFetchError(f"Unknown endpoint: {endpoint}")

url = f"{self.url()}/cvmfs/{lookup[endpoint]['path']}"
timeout_seconds = 5
try:
content = request.urlopen(url, timeout=timeout_seconds)

if endpoint == "repositories.json" or endpoint == ".cvmfs_status.json":
if endpoint in [Endpoints.REPOSITORIES_JSON, Endpoints.CVMFS_STATUS_JSON]:
content = json.loads(content.read())
elif endpoint == ".cvmfspublished":
elif endpoint == Endpoints.CVMFS_PUBLISHED:
content = GetCVMFSPublished.parse_blob(content.read())
elif endpoint == "geoapi":
elif endpoint == Endpoints.GEOAPI:
indices = [int(x) for x in content.read().decode().split(",")]
content = {
"host_indices": indices,
"host_names_input": geoapi_servers,
}

return lookup[endpoint]["class"](**content)
return endpoint.model_class(**content)

except error.URLError as e:
warn(f"fetch_endpoint: {url}", e)
Expand Down
3 changes: 1 addition & 2 deletions cvmfsscraper/tests/test_010_fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from cvmfsscraper import scrape
from cvmfsscraper.constants import GeoAPIStatus
from cvmfsscraper.exceptions import CVMFSFetchError
from cvmfsscraper.main import scrape as scrape_deprecated
from cvmfsscraper.main import scrape_server as scrape_server_deprecated
from cvmfsscraper.server import Stratum0Server, Stratum1Server
Expand Down Expand Up @@ -72,7 +71,7 @@ def test_fetching_unknown_endpoint(self):
"""Test that fetching an unknown endpoint raises the correct exception."""
stratum1 = Stratum1Server("stratum1-no.tld", [], [], scrape_on_init=False)

with self.assertRaises(CVMFSFetchError):
with self.assertRaises(TypeError):
stratum1.fetch_endpoint("unknown")


Expand Down

0 comments on commit 5ae174b

Please sign in to comment.