From a9b9fa531d4947de877a315e6de424821df73419 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Mon, 4 Nov 2024 15:22:58 +0100 Subject: [PATCH] Add MinIO support with audbackend>=2.1.0 (#453) * Add MinIO support with audbackend>=2.1.0 * Add S3 and MinIO to the README * Improve docstrings in Repository * Update documentation * Update pyproject.toml --- README.rst | 10 +++++++--- audb/core/repository.py | 35 +++++++++++++++++------------------ docs/authentication.rst | 9 ++++++--- docs/overview.rst | 5 ++++- docs/publish.rst | 2 +- pyproject.toml | 2 +- 6 files changed, 36 insertions(+), 27 deletions(-) diff --git a/README.rst b/README.rst index fd81709f..6f6e3bb2 100644 --- a/README.rst +++ b/README.rst @@ -13,7 +13,9 @@ and versioned for reproducibility. as it stores all databases in a `common format`_ and manages different versions of a database. Databases are stored in repositories -on local file systems +on local file systems, +MinIO_, +S3_, or Artifactory_ servers. You can request resampling or remixing of audio content @@ -42,11 +44,13 @@ If you want to cite **audb**, you can refer to our paper_: } -.. _common format: https://audeering.github.io/audformat/ .. _Artifactory: https://jfrog.com/artifactory/ +.. _common format: https://audeering.github.io/audformat/ .. _installation: https://audeering.github.io/audb/install.html -.. _quickstart: https://audeering.github.io/audb/quickstart.html +.. _MinIO: https://min.io .. _paper: https://arxiv.org/abs/2303.00645 +.. _quickstart: https://audeering.github.io/audb/quickstart.html +.. _S3: https://aws.amazon.com/s3/ .. badges images and links: diff --git a/audb/core/repository.py b/audb/core/repository.py index cfb01597..1086d229 100644 --- a/audb/core/repository.py +++ b/audb/core/repository.py @@ -17,7 +17,8 @@ class Repository: Args: name: repository name host: repository host - backend: repository backend + backend: repository backend, + for storage on S3 use the `"minio"` backend Examples: >>> Repository("data-local", "/data", "file-system") @@ -26,8 +27,9 @@ class Repository: """ backend_registry = { - "file-system": audbackend.backend.FileSystem, "artifactory": audbackend.backend.Artifactory, + "file-system": audbackend.backend.FileSystem, + "minio": audbackend.backend.Minio, } r"""Backend registry. @@ -73,9 +75,20 @@ def __repr__(self): # noqa: D105 def create_backend_interface(self) -> type[audbackend.interface.Base]: r"""Create backend interface to access repository. + It wraps an :class:`audbackend.interface.Versioned` interface + around it. + The files will then be stored + with the following structure on the backend + (shown by the example of version 1.0.0 of the emodb dataset):: + + emodb/1.0.0/db.yaml <-- header + emodb/1.0.0/db.zip <-- dependency table + emodb/attachment/1.0.0/... <-- attachments + emodb/media/1.0.0/... <-- media files + emodb/meta/1.0.0/... <-- tables + When :attr:`Repository.backend` equals ``artifactory``, - it creates an instance of :class:`audbackend.backend.Artifactory` - and wraps an :class:`audbackend.interface.Maven` interface + it wraps an :class:`audbackend.interface.Maven` interface around it. The files will then be stored with the following structure on the Artifactory backend @@ -87,20 +100,6 @@ def create_backend_interface(self) -> type[audbackend.interface.Base]: emodb/media/.../1.0.0/... <-- media files emodb/meta/.../1.0.0/... <-- tables - When :attr:`Repository.backend` equals ``file-system``, - it creates an instance of :class:`audbackend.backend.FileSystem` - and wraps an :class:`audbackend.interface.Versioned` interface - around it. - The files will then be stored - with the following structure on the Artifactory backend - (shown by the example of version 1.0.0 of the emodb dataset):: - - emodb/1.0.0/db.yaml <-- header - emodb/1.0.0/db.zip <-- dependency table - emodb/attachment/1.0.0/... <-- attachments - emodb/media/1.0.0/... <-- media files - emodb/meta/1.0.0/... <-- tables - The returned backend instance has not yet established a connection to the backend. To establish a connection, diff --git a/docs/authentication.rst b/docs/authentication.rst index c011dca1..9b967001 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -1,7 +1,10 @@ Authentication ============== -Using Artifactory as backend -requires authentication. +Using Artifactory, +MinIO or S3 +as backend +might require authentication. For more information, -see :class:`audbackend.backend.Artifactory`. +see :class:`audbackend.backend.Artifactory` +and :class:`audbackend.backend.Minio` (for MinIO and S3). diff --git a/docs/overview.rst b/docs/overview.rst index 3af0b4d6..108c6210 100644 --- a/docs/overview.rst +++ b/docs/overview.rst @@ -31,6 +31,7 @@ to communicate with the underlying backend. At the moment, it supports to store the data in a folder on a local file system, +in a bucket on MinIO_ or S3_ storage, or inside a `Generic repository`_ on an `Artifactory server`_. @@ -96,6 +97,8 @@ the following operations are performed: .. graphviz:: pics/load.dot -.. _Generic repository: https://jfrog.com/help/r/jfrog-artifactory-documentation/repository-management .. _Artifactory server: https://jfrog.com/artifactory/ +.. _Generic repository: https://jfrog.com/help/r/jfrog-artifactory-documentation/repository-management .. _implements the required functions: https://github.com/audeering/audbackend/blob/edd23462799ae9052a43cdd045698f78e19dbcaf/audbackend/core/backend.py#L559-L659 +.. _MinIO: https://min.io +.. _S3: https://aws.amazon.com/s3/ diff --git a/docs/publish.rst b/docs/publish.rst index 7c88f1f5..4dc4f882 100644 --- a/docs/publish.rst +++ b/docs/publish.rst @@ -148,7 +148,7 @@ in the file ``db.parquet``. Note, that the structure of the folders used for versioning -depends on the backend, +:meth:`depends on the backend `, and differs slightly for an Artifactory backend. diff --git a/pyproject.toml b/pyproject.toml index 653f2e54..f73ba4fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ classifiers = [ ] requires-python = '>=3.9' dependencies = [ - 'audbackend[artifactory] >=2.0.0', + 'audbackend[all] >=2.1.0', 'audeer >=2.1.0', 'audformat >=1.2.0', 'audiofile >=1.0.0',