Skip to content

Commit

Permalink
Add proper logging support.
Browse files Browse the repository at this point in the history
Fixes #5.
  • Loading branch information
terjekv committed Jan 21, 2024
1 parent 23b4de8 commit 7bae80a
Show file tree
Hide file tree
Showing 9 changed files with 186 additions and 9 deletions.
32 changes: 32 additions & 0 deletions cvmfsscraper/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,42 @@
"""Core of the cvmfsscraper package."""

import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import List

import structlog

from cvmfsscraper.server import CVMFSServer, Stratum0Server, Stratum1Server

structlog.configure(
processors=[
structlog.contextvars.merge_contextvars,
structlog.processors.add_log_level,
structlog.processors.StackInfoRenderer(),
structlog.dev.set_exc_info,
structlog.processors.TimeStamper(fmt="%Y-%m-%d %H:%M:%S", utc=False),
structlog.processors.JSONRenderer(),
],
context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(), # Ensure compatibility
cache_logger_on_first_use=True,
)


def set_log_level(level: int) -> None:
"""Set the log level for the library.
This function allows the consumer of the library to set the desired log level.
:param level: The log level to set. This should be a value from the logging module,
such as logging.INFO, logging.DEBUG, etc.
"""
logging.basicConfig(level=level, format="%(message)s")
structlog.configure(
wrapper_class=structlog.stdlib.BoundLogger,
cache_logger_on_first_use=True,
)


def scrape_server(
dns_name: str,
Expand Down
13 changes: 13 additions & 0 deletions cvmfsscraper/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

from typing import Any

import structlog

log = structlog.getLogger(__name__)


class CVMFSScraperBaseException(Exception):
"""Base exception for cvmfsscraper."""
Expand All @@ -12,6 +16,15 @@ def __init__(
"""Initialize the exception."""
self.message = message
self.original_exception = original_excption

log.debug(
"Exception raised",
exception=self.__class__.__name__,
message=message,
original_exception=original_excption,
args=args,
)

super().__init__(message, *args)


Expand Down
13 changes: 13 additions & 0 deletions cvmfsscraper/http_get_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@
from enum import Enum
from typing import Any, Dict, List, Optional, Type, Union

import structlog
from pydantic import BaseModel, Field, field_validator, model_validator

from cvmfsscraper.exceptions import CVMFSValidationError

log = structlog.getLogger(__name__)


def hex_field(min_length: int, max_length: int, alias: str):
"""Create a Field for hexadecimal strings with a specified length range.
Expand All @@ -24,6 +27,16 @@ def hex_field(min_length: int, max_length: int, alias: str):
class CVMFSBaseModel(BaseModel):
"""Base model for CVMFS models."""

def __init__(self, **kwargs: Any) -> None:
"""Initialize the model."""
log.debug(
"Initializing pydantic model",
model=self.__class__.__name__,
kwargs=kwargs,
)

super().__init__(**kwargs)


class RepositoryOrReplica(BaseModel):
"""Model for a repository or replica."""
Expand Down
22 changes: 22 additions & 0 deletions cvmfsscraper/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,31 @@

from typing import Any, Dict, List

import structlog

from cvmfsscraper import scrape as scrape_proper
from cvmfsscraper import scrape_server as scrape_server_proper
from cvmfsscraper.server import CVMFSServer
from cvmfsscraper.tools import deprecated

deplog = structlog.getLogger("deprecation")


def scrape(*args: Any, **kwargs: Dict[str, Any]) -> List[CVMFSServer]:
"""Legacy API support for cvmfsscraper."""
deprecated(
"cvmfsserver.main.scrape",
"cvmfsserver.scrape",
)
deplog.warning(
"Deprecated API used",
deprecated="cvmfsserver.main.scrape",
replacement="cvmfsserver.scrape",
message=(
"cvmfsserver.main.scrape is deprecated and will be removed in a future release."
"Please use cvmfsserver.scrape instead."
),
)
return scrape_proper(*args, **kwargs)


Expand All @@ -23,4 +36,13 @@ def scrape_server(*args: Any, **kwargs: Dict[str, Any]) -> CVMFSServer:
"cvmfsserver.main.scrape_server",
"cvmfsserver.scrape_server",
)
deplog.warning(
"Deprecated API used",
deprecated="cvmfsserver.main.scrape_server",
replacement="cvmfsserver.scrape_server",
message=(
"cvmfsserver.main.scrape_server is deprecated and will be removed in a future release."
"Please use cvmfsserver.scrape_server instead."
),
)
return scrape_server_proper(*args, **kwargs)
26 changes: 23 additions & 3 deletions cvmfsscraper/repository.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
"""A CVMFS repository."""
from typing import Dict

import structlog

from cvmfsscraper.http_get_models import (
Endpoints,
GetCVMFSPublished,
GetCVMFSStatusJSON,
)
from cvmfsscraper.tools import warn

log = structlog.getLogger(__name__)


class Repository:
Expand Down Expand Up @@ -57,6 +60,8 @@ def __init__(self, server: object, name: str, url: str):

self.fetch_errors = []

log.debug("Initalizing repository", server=server.name, name=name, url=url)

self.scrape()

def __str__(self) -> str:
Expand All @@ -65,18 +70,33 @@ def __str__(self) -> str:

def scrape(self) -> None:
"""Scrape the repository."""
log.debug(
"Scraping repository", server=self.server, name=self.name, url=self.path
)
try:
cvmfspublished = self.fetch_cvmfspublished()
self.parse_cvmfspublished(cvmfspublished)
except Exception as exc:
warn("CVMFSpublished", exc)
log.warn(
"Scrape error",
exc=exc,
server=self.server,
name=self.name,
url=self.path,
)
self.fetch_errors.append({"path": self.path, "error": exc})

try:
repo = self.fetch_repository()
self.parse_status_json(repo)
except Exception as exc:
warn("Repository", exc)
log.warn(
"Scrape error",
exc=exc,
server=self.server,
name=self.name,
url=self.path,
)
self.fetch_errors.append({"path": self.path, "error": exc})

def attribute_mapping(self) -> Dict[str, str]:
Expand Down
61 changes: 57 additions & 4 deletions cvmfsscraper/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from typing import Dict, List
from urllib import error, request

import structlog

from cvmfsscraper.constants import GeoAPIStatus
from cvmfsscraper.http_get_models import (
EndpointClassesType,
Expand All @@ -14,7 +16,9 @@
RepositoryOrReplica,
)
from cvmfsscraper.repository import Repository
from cvmfsscraper.tools import GEOAPI_SERVERS, warn
from cvmfsscraper.tools import GEOAPI_SERVERS

log = structlog.getLogger(__name__)


class CVMFSServer:
Expand Down Expand Up @@ -60,6 +64,14 @@ def __init__(

self.fetch_errors = []

log.info(
"Initializing server",
server=server,
repos=repos,
ignore_repos=ignore_repos,
scrape_on_init=scrape_on_init,
)

if scrape_on_init:
self.scrape()

Expand All @@ -73,6 +85,8 @@ def url(self) -> str:

def scrape(self) -> None:
"""Scrape the server."""
log.info("Scraping server", server=self.name)

self.populate_repositories()

if not self.fetch_errors:
Expand Down Expand Up @@ -106,6 +120,7 @@ def populate_repositories(self) -> None:
If the server is down, the list will be empty.
"""
log.info("Populating repositories", server=self.name)
try:
repodata = self.fetch_repositories_json()

Expand All @@ -118,7 +133,11 @@ def populate_repositories(self) -> None:

self._is_down = False
except Exception as e: # pragma: no cover
warn(f"Populate repository: {self.name}", e)
log.error(
"Populate repository failure",
exc=e,
server=self.name,
)
self.fetch_errors.append({"path": self.name, "error": e})

def process_repositories_json(
Expand Down Expand Up @@ -185,7 +204,11 @@ def check_geoapi_status(self) -> GeoAPIStatus:
else:
return GeoAPIStatus.LOCATION_ERROR
except Exception as e: # pragma: no cover
warn("GEOAPI failure", e)
log.error(
"GeoAPI failure",
exc=e,
name=self.name,
)
return GeoAPIStatus.NO_RESPONSE

def fetch_repositories_json(self) -> GetCVMFSRepositoriesJSON:
Expand Down Expand Up @@ -244,19 +267,42 @@ def fetch_endpoint(
if not isinstance(endpoint, Endpoints): # type: ignore
raise TypeError("endpoint must be an Endpoints enum value")

log.debug(
"Fetching endpoint", server=self.name, endpoint=endpoint.name, repo=repo
)

geoapi_str = ",".join(geoapi_servers)
formatted_path = endpoint.path.format(repo=repo, geoapi_str=geoapi_str)
url = f"{self.url()}/cvmfs/{formatted_path}"

timeout_seconds = 5
try:
log.info("Fetching url", url=url)
content = request.urlopen(url, timeout=timeout_seconds)

if endpoint in [Endpoints.REPOSITORIES_JSON, Endpoints.CVMFS_STATUS_JSON]:
log.debug(
"Fetched JSON endpoint",
server=self.name,
endpoint=endpoint.name,
repo=repo,
)
content = json.loads(content.read())
elif endpoint == Endpoints.CVMFS_PUBLISHED:
log.debug(
"Fetched .cvmfspublished",
server=self.name,
endpoint=endpoint.name,
repo=repo,
)
content = GetCVMFSPublished.parse_blob(content.read())
elif endpoint == Endpoints.GEOAPI:
log.debug(
"Fetched geoapi",
server=self.name,
endpoint=endpoint.name,
repo=repo,
)
indices = [int(x) for x in content.read().decode().split(",")]
content = {
"host_indices": indices,
Expand All @@ -266,7 +312,14 @@ def fetch_endpoint(
return endpoint.model_class(**content)

except error.URLError as e:
warn(f"fetch_endpoint: {url}", e)
log.error(
"Fetch endpoint failure",
exc=e,
name=self.name,
endpoint=endpoint.name,
repo=repo,
url=url,
)
raise e from e


Expand Down
8 changes: 7 additions & 1 deletion cvmfsscraper/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
import urllib.request
from typing import Any

import structlog

log = structlog.getLogger(__name__)

GEOAPI_SERVERS = [
"cvmfs-s1fnal.opensciencegrid.org",
"cvmfs-stratum-one.cern.ch",
Expand All @@ -25,6 +29,8 @@ def deprecated(old: str, new: str) -> None:

def fetch_absolute(obj: object, url: str) -> str:
"""Fetch an absolute URL, handle exceptions."""
log.info("Fetching", url=url)

timeout_seconds = 5
try:
content = urllib.request.urlopen(url, timeout=timeout_seconds).read()
Expand All @@ -34,7 +40,7 @@ def fetch_absolute(obj: object, url: str) -> str:

return content
except Exception as e:
warn(f"fetch_absolute: {url}", e)
log.warn("Fetch absolute", url=url, exception=e)
obj.fetch_errors.append({"path": url, "error": e})

return
Expand Down
Loading

0 comments on commit 7bae80a

Please sign in to comment.