feat: Make wandb operator available on OperatorHub (#32)

wandb · Oct 21, 2024 · 1a59dab · 1a59dab
1 parent 878ed6c
commit 1a59dab
Show file tree

Hide file tree

Showing 13 changed files with 900 additions and 0 deletions.
diff --git a/config/manifests/bases/operator.clusterserviceversion.yaml b/config/manifests/bases/operator.clusterserviceversion.yaml
@@ -0,0 +1,51 @@
+apiVersion: operators.coreos.com/v1alpha1
+kind: ClusterServiceVersion
+metadata:
+  annotations:
+    alm-examples: '[]'
+    capabilities: Basic Install
+  name: operator.v0.0.0
+  namespace: placeholder
+spec:
+  apiservicedefinitions: {}
+  customresourcedefinitions: {}
+  description: |-
+    The Weights and Biases Operator simplifies the management of machine learning experiments, tracking, and infrastructure. It integrates with Kubernetes to deploy and manage Weights and Biases instances.
+    Features:
+    - Experiment Tracking
+    - Hyperparameter Tuning
+    - Model Management
+    - Integration with popular ML frameworks
+  displayName: Weights & Biases Operator
+  icon:
+  - base64data: ""
+    mediatype: ""
+  install:
+    spec:
+      deployments: null
+    strategy: ""
+  installModes:
+  - supported: false
+    type: OwnNamespace
+  - supported: false
+    type: SingleNamespace
+  - supported: false
+    type: MultiNamespace
+  - supported: true
+    type: AllNamespaces
+  keywords:
+  - machine learning
+  - experiment tracking
+  - ML infrastructure
+  - weights and biases
+  links:
+  - name: Weights & Biases Documentation
+    url: https://docs.wandb.ai/
+  maintainers:
+  - email: abhinav.garg@domain.com
+    name: Abhinav Garg
+  maturity: alpha
+  provider:
+    name: Weights & Biases
+    url: https://wandb.ai
+  version: 0.0.0
diff --git a/olm/Makefile b/olm/Makefile
@@ -0,0 +1,26 @@
+# Makefile for building and pushing the WandB Operator image
+
+# Define variables
+IMAGE ?= wandb-operator                       # Name of the Docker image
+QUAY_DOCKER_REGISTRY ?= quay.io                # Docker registry URL
+QUAY_PROJECT ?= wandb_tools                   # Project name in the registry
+VERSION ?= latest                              # Default version for the image
+BRANCH ?= main                                 # Default branch for tagging
+
+# Targets
+
+# Build the Docker image
+build-image:
+	docker build -t $(QUAY_DOCKER_REGISTRY)/$(QUAY_PROJECT)/$(IMAGE):$(VERSION) \
+	-f olm-catalog/bundle.Dockerfile olm-catalog/
+
+	docker tag $(QUAY_DOCKER_REGISTRY)/$(QUAY_PROJECT)/$(IMAGE):$(VERSION) \
+	$(QUAY_DOCKER_REGISTRY)/$(QUAY_PROJECT)/$(IMAGE):$(BRANCH)
+
+# Build and push the Docker image to the registry
+build-push:
+	docker push $(QUAY_DOCKER_REGISTRY)/$(QUAY_PROJECT)/$(IMAGE):$(VERSION)
+	docker push $(QUAY_DOCKER_REGISTRY)/$(QUAY_PROJECT)/$(IMAGE):$(BRANCH)
+
+# Specify that these targets do not represent files
+.PHONY: build-image build-push
diff --git a/olm/docs/OLM_Bundle.md b/olm/docs/OLM_Bundle.md
@@ -0,0 +1,134 @@
+
+# OLM Bundle Overview
+
+Operator Lifecycle Manager (OLM) has updated the method for storing operator bundles. Bundles are now packaged as container images, which include operator manifests and associated metadata. These images are compliant with OCI (Open Container Initiative) specifications, enabling them to be stored and pulled from any OCI-compliant container registry.
+
+The operator bundle image is designed as a scratch-based (non-runnable) container image. This bundle is utilized by OLM to install operators in OLM-enabled clusters, ensuring a streamlined and automated deployment process.
+
+The directory structure for an operator bundle is as follows:
+
+```
+$ tree bundle
+
+bundle
+├── ci.yaml
+├── manifests
+│   ├── apps.wandb.com_weightsandbiases.yaml
+│   ├── wandb-operator-manager_rbac.authorization.k8s.io_v1_clusterrole.yaml
+│   ├── wandb-operator-manager_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml
+│   └── wandb-operator.clusterserviceversion.yaml
+├── metadata
+│   └── annotations.yaml
+└── tests
+    └── scorecard
+        └── config.yaml
+
+```
+
+Each operator bundle must include a Cluster Service Version (CSV) file. Bundle metadata is stored in `bundle/metadata/annotations.yaml`, which provides essential information about the specific version of the operator available in the registry.
+
+Example content of `annotations.yaml`:
+
+```
+$ cat metadata/annotations.yaml
+
+annotations:
+  com.redhat.openshift.versions: v4.12
+  # Core bundle annotations.
+  operators.operatorframework.io.bundle.mediatype.v1: registry+v1
+  operators.operatorframework.io.bundle.manifests.v1: manifests/
+  operators.operatorframework.io.bundle.metadata.v1: metadata/
+  operators.operatorframework.io.bundle.package.v1: wandb-operator
+  operators.operatorframework.io.bundle.channels.v1: stable
+  operators.operatorframework.io.bundle.channel.default.v1: stable
+  operators.operatorframework.io.metrics.builder: operator-sdk-v1.37.0
+  operators.operatorframework.io.metrics.mediatype.v1: metrics+v1
+  operators.operatorframework.io.metrics.project_layout: go.kubebuilder.io/v3
+
+  # Annotations for testing.
+  operators.operatorframework.io.test.mediatype.v1: scorecard+v1
+  operators.operatorframework.io.test.config.v1: tests/scorecard/
+```
+
+## Building the Operator Bundle Image
+
+You can create a bundle image using the following `Dockerfile`:
+
+```
+$ cat bundle.Dockerfile
+
+FROM scratch
+
+# Core bundle labels.
+LABEL operators.operatorframework.io.bundle.mediatype.v1=registry+v1
+LABEL operators.operatorframework.io.bundle.manifests.v1=manifests/
+LABEL operators.operatorframework.io.bundle.metadata.v1=metadata/
+LABEL operators.operatorframework.io.bundle.package.v1=wandb-operator
+LABEL operators.operatorframework.io.bundle.channels.v1=stable
+LABEL operators.operatorframework.io.bundle.channel.default.v1=stable
+LABEL operators.operatorframework.io.metrics.builder=operator-sdk-v1.37.0
+LABEL operators.operatorframework.io.metrics.mediatype.v1=metrics+v1
+LABEL operators.operatorframework.io.metrics.project_layout=go.kubebuilder.io/v3
+
+# Labels for testing.
+LABEL operators.operatorframework.io.test.mediatype.v1=scorecard+v1
+LABEL operators.operatorframework.io.test.config.v1=tests/scorecard/
+
+# Copy files to locations specified by labels.
+COPY ./manifests /manifests/
+COPY ./metadata /metadata/
+COPY ./tests/scorecard /tests/scorecard/
+
+LABEL com.redhat.openshift.versions=v4.12
+```
+
+To build the image and push it to a public repository, use the following command:
+
+```
+docker build -f bundle.Dockerfile -t quay.io/wandb_tools/wandb-operator:v1.0.0 .
+```
+
+At this point, you have built the operator bundle image. To integrate it into Red Hat's certification pipeline, you can push the image for Red Hat verification. However, this image is not deployable as a CatalogSource on OpenShift yet.
+
+## Creating a CatalogSource for OpenShift
+
+To deploy the operator on OpenShift, you must create a `CatalogSource` from the bundle image. First, ensure that the `opm` tool is installed.
+
+Run the following command to create the `CatalogSource`:
+
+```
+opm index add --container-tool docker --bundles quay.io/wandb_tools/wandb-operator:v1.0.0  --tag quay.io/wandb_tools/wandb-operator-index:v1.0.0
+```
+
+This will generate an image that can be used as a `CatalogSource`: `quay.io/wandb_tools/wandb-operator-index:v1.0.0`.
+
+### Updating CSVs for New Versions
+
+If you want to replace an old CSV with a new one in your catalog, you can use the following command to include both bundles:
+
+```
+opm index add --container-tool=docker --bundles=quay.io/wandb_tools/wandb-operator:v1.0.0,quay.io/wandb_tools/wandb-operator:v1.0.1 --tag  quay.io/wandb_tools/wandb-operator-index:v1.0.1
+```
+
+This command creates an image (`v1.0.1`) that supersedes the older CSV (`v1.0.0`).
+
+## OLM Catalog Source Upgrade Chain
+
+When managing multiple versions of an operator, it is crucial for OLM to automatically upgrade to the latest version. To achieve this, the `CSV` must include a `replaces` field, indicating which previous CSV version it is replacing.
+
+Consider the following example where the initial version (`v1.0.0`) of the operator is created:
+
+```
+opm index add --container-tool docker --bundles quay.io/wandb_tools/wandb-operator:v1.0.0  --tag quay.io/wandb_tools/wandb-operator-index:v1.0.0
+```
+
+Once a new version (`v1.0.1`) is released, you can specify the `--from-index` option to ensure the upgrade chain links to the previous version:
+
+```
+opm index add --container-tool=docker --bundles=quay.io/wandb_tools/wandb-operator:v1.0.1 --from-index=quay.io/wandb_tools/wandb-operator-index:v1.0.0 --tag quay.io/wandb_tools/wandb-operator-index:v1.0.1
+
+```
+
+This ensures that `v1.0.1` will automatically replace `v1.0.0` when upgrading.
+
+You can continue this process for subsequent versions, and `opm` will manage the version upgrade chain via the `CSV` definitions.
diff --git a/olm/docs/OLM_Testing_Guide.md b/olm/docs/OLM_Testing_Guide.md
@@ -0,0 +1,79 @@
+# Testing Wandb on OLM
+
+## Introduction
+
+This guide is intended for new contributors who are beginning to work with Weights and Biases (Wandb) and are looking to test Wandb on the Operator Lifecycle Manager (OLM). 
+It provides a step-by-step walkthrough of each component within the OLM environment and demonstrates how to deploy Wandb using it.
+
+
+## Installing Wandb on OLM via CatalogSource
+
+A `CatalogSource` is a registry in OLM that hosts operators in the form of bundles. For internal releases, we provide a `CatalogSource` image that hosts the bundle required for OLM to deploy the operator.
+
+To create a `CatalogSource` for version `v4.14` in the `openshift-marketplace` namespace, use the following configuration:
+
+```yaml
+apiVersion: operators.coreos.com/v1alpha1
+kind: CatalogSource
+metadata:
+  name: wandb-operator-catalog
+  labels:
+    app.kubernetes.io/part-of: wandb-operator
+spec:
+  sourceType: grpc
+  image: quay.io/wandb_tools/wandb-operator-index:<release_tag>
+  updateStrategy:
+    registryPoll:
+      interval: 5m
+```
+
+Note: If you create the above CatalogSource in the openshift-marketplace namespace, the operator will be available cluster-wide.
+If you create it in another namespace, it will only be available within that namespace.
+
+
+## Installing Wandb on OLM.
+
+Once the CatalogSource is deployed, you should see a pod with the catalogsource name running, indicating that your operator is ready to be served. You can then navigate to the OpenShift UI and install the Weights and Biases operator by selecting it from the list of operators provided by your CatalogSource. Refer to the following image for guidance:
+
+![][operators]
+
+After selecting the operator, proceed with the installation of the Weights and Biases operator. Upon successful installation, you should see the following output:
+![][Successful Installation]
+
+Next, you can create a Weights and Biases custom resource (CR) from the UI, and it should be deployed successfully:
+![][Wandb Installation]
+
+## Debugging a Failed Wandb Install on OLM
+
+OLM follows a sequential process when installing an operator, which involves three key components. If any of these components fail, the installation will not proceed to the next stage:
+
+These are the three components:
+1. Subscription
+2. InstallPlan
+3. ClusterServiceVersion
+
+If your installation fails, first verify if the InstallPlan exists. If it does, it indicates that the Subscription is functioning correctly. Next, check if the ClusterServiceVersion (CSV) exists. If the CSV is missing, the failure occurred at the InstallPlan level. To diagnose the issue, describe the InstallPlan and check for errors.
+
+If the CSV exists, the failure occurred at the CSV level. In this case, describe the CSV and look for errors to identify the cause.
+
+If the operator installation succeeds but the Wandb custom resource (CR) fails, check the operator logs and the Wandb CR status for further debugging information.
+
+Now, if your install failed you check that installplan exist or not, if it exists means that subscription is
+fine. Now you check ClusterServiceVersion exists or not. If it does not exist it means the failure happen at
+installplan level. Now describe the installplan and look for error.
+
+
+[operators]: images/provider.png?raw=true "Community Operators"
+[Successful Installation]: images/InstallSuccess.png?raw=true "Successful Installation"
+[Wandb Installation]: images/WandbSuccess.png?raw=true "Wandb installed successfully"
+
+## OLM Integration Tests Locally
+
+You can install OLM on any Kubernetes cluster using the following command:
+
+```shell
+operator-sdk olm install
+```
+
+## OLM Scorecard Tests Locally
+OLM scorecard tests can be run locally to validate the functionality of your operator.
diff --git a/olm/olm-catalog/bundle.Dockerfile b/olm/olm-catalog/bundle.Dockerfile
@@ -0,0 +1,23 @@
+FROM scratch
+
+# Core bundle labels.
+LABEL operators.operatorframework.io.bundle.mediatype.v1=registry+v1
+LABEL operators.operatorframework.io.bundle.manifests.v1=manifests/
+LABEL operators.operatorframework.io.bundle.metadata.v1=metadata/
+LABEL operators.operatorframework.io.bundle.package.v1=wandb-operator
+LABEL operators.operatorframework.io.bundle.channels.v1=stable
+LABEL operators.operatorframework.io.bundle.channel.default.v1=stable
+LABEL operators.operatorframework.io.metrics.builder=operator-sdk-v1.37.0
+LABEL operators.operatorframework.io.metrics.mediatype.v1=metrics+v1
+LABEL operators.operatorframework.io.metrics.project_layout=go.kubebuilder.io/v3
+
+# Labels for testing.
+LABEL operators.operatorframework.io.test.mediatype.v1=scorecard+v1
+LABEL operators.operatorframework.io.test.config.v1=tests/scorecard/
+
+# Copy files to locations specified by labels.
+COPY ./manifests /manifests/
+COPY ./metadata /metadata/
+COPY ./tests/scorecard /tests/scorecard/
+
+LABEL com.redhat.openshift.versions=v4.12
diff --git a/olm/olm-catalog/ci.yaml b/olm/olm-catalog/ci.yaml
@@ -0,0 +1,8 @@
+---
+# Use `replaces-mode` or `semver-mode`. Once you switch to `semver-mode`, there is no easy way back.
+updateGraph: replaces-mode
+reviewers:
+  - danielpanzella
+  - jsbroks
+  - abhinavg6
+  - velotioaastha
diff --git a/olm/olm-catalog/manifests/apps.wandb.com_weightsandbiases.yaml b/olm/olm-catalog/manifests/apps.wandb.com_weightsandbiases.yaml
@@ -0,0 +1,71 @@
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.14.0
+  creationTimestamp: null
+  name: weightsandbiases.apps.wandb.com
+spec:
+  group: apps.wandb.com
+  names:
+    kind: WeightsAndBiases
+    listKind: WeightsAndBiasesList
+    plural: weightsandbiases
+    shortNames:
+    - wandb
+    singular: weightsandbiases
+  scope: Namespaced
+  versions:
+  - name: v1
+    schema:
+      openAPIV3Schema:
+        description: WeightsAndBiases is the Schema for the weightsandbiases API
+        properties:
+          apiVersion:
+            description: |-
+              APIVersion defines the versioned schema of this representation of an object.
+              Servers should convert recognized schemas to the latest internal value, and
+              may reject unrecognized values.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+            type: string
+          kind:
+            description: |-
+              Kind is a string value representing the REST resource this object represents.
+              Servers may infer this from the endpoint the client submits requests to.
+              Cannot be updated.
+              In CamelCase.
+              More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+            type: string
+          metadata:
+            type: object
+          spec:
+            description: WeightsAndBiasesSpec defines the desired state of WeightsAndBiases
+            properties:
+              chart:
+                description: Unstructured values for rendering CDK8s Config.
+                type: object
+                x-kubernetes-preserve-unknown-fields: true
+              values:
+                description: Unstructured values for rendering CDK8s Config.
+                type: object
+                x-kubernetes-preserve-unknown-fields: true
+            type: object
+          status:
+            description: WeightsAndBiasesStatus defines the observed state of WeightsAndBiases
+            properties:
+              phase:
+                type: string
+              version:
+                type: string
+            type: object
+        type: object
+    served: true
+    storage: true
+    subresources:
+      status: {}
+status:
+  acceptedNames:
+    kind: ""
+    plural: ""
+  conditions: null
+  storedVersions: null