From ad4bab9f33802196bd371a524d734e665be4794b Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk <77289967+wandb-zacharyblasczyk@users.noreply.github.com> Date: Thu, 15 Jun 2023 12:49:02 -0500 Subject: [PATCH] feat(launch-agent): Configuration changes (#12) Co-authored-by: Ben Sherman --- charts/launch-agent/Chart.yaml | 3 +- charts/launch-agent/README.md | 46 +++++++++++-------- charts/launch-agent/ci/basic-values.yaml | 21 +++++++++ charts/launch-agent/templates/configmap.yaml | 5 +- charts/launch-agent/templates/deployment.yaml | 17 ++++++- charts/launch-agent/templates/namespace.yaml | 6 ++- charts/launch-agent/templates/rbac.yaml | 20 +++++--- charts/launch-agent/templates/secret.yaml | 8 ++-- charts/launch-agent/templates/volcano.yaml | 2 +- charts/launch-agent/values.yaml | 13 ++++++ 10 files changed, 107 insertions(+), 34 deletions(-) diff --git a/charts/launch-agent/Chart.yaml b/charts/launch-agent/Chart.yaml index 34e4e11e..742f6820 100644 --- a/charts/launch-agent/Chart.yaml +++ b/charts/launch-agent/Chart.yaml @@ -1,8 +1,9 @@ apiVersion: v2 name: launch-agent +icon: https://em-content.zobj.net/thumbs/240/apple/354/rocket_1f680.png description: A Helm chart for running the W&B Launch Agent in Kubernetes type: application -version: 0.5.0 +version: 0.6.0 maintainers: - name: wandb email: support@wandb.com diff --git a/charts/launch-agent/README.md b/charts/launch-agent/README.md index de7c005c..4609b454 100644 --- a/charts/launch-agent/README.md +++ b/charts/launch-agent/README.md @@ -4,33 +4,43 @@ This chart deploys the W&B Launch Agent to your Kubernetes cluster. The launch agent is a Kubernetes Deployment that runs a container that connects to the W&B API and watches for new runs in one or more launch queues. When the agent pops a run off the queue(s), it will launch a Kubernetes Job to execute the run on the W&B user's behalf. -To deploy an agent, you will need to specify the following values: +To deploy an agent, you will need to specify the following values in [`values.yaml`](values.yaml): - `agent.apiKey`: Your W&B API key - `launchConfig`: The literal contents of a launch agent config file that will be used to configure the agent. See the [launch agent docs](https://docs.wandb.ai/guides/launch/run-agent) for more information. You will likely want to modify the variable `agent.resources.limits.{cpu,mem}`, which default to `1000m`, and `1Gi` respectively. -You can provide these values by modifying the contents of [`values.yaml`](values.yaml) or by passing them in as command line arguments to `helm install`, e.g. +By default, this chart will also install [volcano](https://volcano.sh) +- `volcano`: Set to `false` to disable volcano install (default: `true`) -By default, this chart will also install [volcano](https://volcano.sh), but this can be disabled by setting `volcano=false`. +You can modify the values directly in the `values.yaml` file or provide them as command line arguments when running `helm install`, for example: ```bash -helm install --set agent.apiKey= --set-file launchConfig= +helm install --set agent.apiKey= ``` -## Chart variables +Here is an example with a `values.yaml` -Below is a table describing chart variables, their type, whether the user is required to provide a value, the default value, and a description of how the variable is used. - -| Variables | Type | Required | Default | Description | -|--------|-----|------|--|-------| -| `agent.apiKey` | string | **Yes** | n/a | W&B API key to be used by the agent. | -| `agent.Image` | string | No | `wandb/launch-agent-dev:latest` | Container image for the agent. -| `agent.imagePullPolicy` | string | No | Always | Pull policy for the agent container image. -| `agent.resources` | object | No | Limit to 1 CPU, 1Gi RAM. | [Pod spec resources block](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) for the agent. -| `launchConfig` | string | **Yes** | n/a | Launch agent configuration file contents. This config will be mounted at `/home/launch_agent/.config/wandb` in the agent container. For more details on how this config is structured, see [these docs](https://docs.wandb.ai/guides/launch/run-agent). -| `gitCreds` | string | No | `null` | If set, the conents of this string will be stored in a k8s secret and then mounted in the agent container at `~/.git-credentials` and used to grant the agent permission to clone private repositories via https. For more information on what the contents of this file should look like, see the [official git documentation](https://git-scm.com/docs/git-credential-store#_storage_format). -| `volcano` | bool | No | `true` | Controls whether the volcano scheduler should be installed in your cluster along with the agent. Set to `false` to disable volcano install. -| `serviceAccount.annotations` | object | No | `{}` | Annotations to add to the service account created for the agent. -| `azureStorageAccessKey` | string | No | "" | Azure storage access key required for kaniko to acces build contexts in azure blob storage. +```bash +helm upgrade --namespace=wandb --create-namespace --install wandb-launch wandb/launch-agent -f ./values.yaml --namespace=wandb-launch +``` + +## Chart variables +The table below describes all the available variables in the chart: + +| Variable | Type | Required | Default | Description | +| ----------------------------- | -------------- | -------- | --------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | +| `agent.labels` | object | No | {} | Labels that will be added to the agent deployment. | +| `agent.apiKey` | string | **Yes** | "" | W&B API key to be used by the agent. | +| `agent.image` | string | No | `wandb/launch-agent-dev:latest` | Container image for the agent. | +| `agent.imagePullPolicy` | string | No | `Always` | Pull policy for the agent container image. | +| `agent.resources` | object | No | Limit to 1 CPU, 1Gi RAM | Pod spec resources block for the agent. | +| `namespace` | string | No | `wandb` | The namespace to deploy the agent into. | +| `additionalTargetNamespaces` | list(string) | No | [`wandb`,`default`] | A list of namespaces the agent can run jobs in. | +| `baseUrl` | string | No | `https://api.wandb.ai` | URL of your W&B server api. | +| `launchConfig` | mutiline string | **Yes** | `null` | his should be set to the literal contents of your launch agent config. | +| `volcano` | bool | No | `true` | Controls whether the volcano scheduler should be installed in your cluster along with the agent. Set to `false` to disable volcano installation. | +| `gitCreds` | mutiline string | No | `null` | Contents of a git credentials file. | +| `serviceAccount.annotations` | object | No | `null` | Annotations for the wandb service account. | +| `azureStorageAccessKey` | string | No | "" | Azure storage access key required for kaniko to acces build contexts in azure blob storage. | diff --git a/charts/launch-agent/ci/basic-values.yaml b/charts/launch-agent/ci/basic-values.yaml index c9e9ec08..ebffcee5 100644 --- a/charts/launch-agent/ci/basic-values.yaml +++ b/charts/launch-agent/ci/basic-values.yaml @@ -1,4 +1,25 @@ agent: apiKey: "" + image: wandb/launch-agent-dev:latest + imagePullPolicy: Always + resources: + limits: + cpu: 1000m + memory: 1Gi + +namespace: wandb + +additionalTargetNamespaces: + - default + - wandb + launchConfig: | queues: ["default"] + +volcano: true + +gitCreds: | + +serviceAccount: + annotations: + iam.gke.io/gcp-service-account: diff --git a/charts/launch-agent/templates/configmap.yaml b/charts/launch-agent/templates/configmap.yaml index 88363a3d..0ac9cb28 100644 --- a/charts/launch-agent/templates/configmap.yaml +++ b/charts/launch-agent/templates/configmap.yaml @@ -1,8 +1,11 @@ +--- apiVersion: v1 data: + wandb-base-url: {{ .Values.baseUrl }} launch-config.yaml: | {{ required "Please set launchConfig to the contents of your agent config file" .Values.launchConfig | nindent 4 }} kind: ConfigMap metadata: name: wandb-launch-configmap - namespace: wandb \ No newline at end of file + namespace: {{ .Values.namespace }} +... diff --git a/charts/launch-agent/templates/deployment.yaml b/charts/launch-agent/templates/deployment.yaml index 51e3a859..f2db41bb 100644 --- a/charts/launch-agent/templates/deployment.yaml +++ b/charts/launch-agent/templates/deployment.yaml @@ -1,10 +1,15 @@ +--- apiVersion: apps/v1 kind: Deployment metadata: name: launch-agent - namespace: wandb + namespace: {{ .Values.namespace }} + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} spec: replicas: 1 + strategy: + type: Recreate selector: matchLabels: app: launch-agent @@ -41,6 +46,13 @@ spec: valueFrom: fieldRef: fieldPath: spec.serviceAccountName + - name: WANDB_BASE_URL + valueFrom: + configMapKeyRef: + name: wandb-launch-configmap + key: wandb-base-url + - name: CM_CHECKSUM + value: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} volumeMounts: - name: wandb-launch-config mountPath: /home/launch_agent/.config/wandb @@ -64,4 +76,5 @@ spec: - name: git-config secret: secretName: git-config - {{ end}} \ No newline at end of file + {{ end}} +... \ No newline at end of file diff --git a/charts/launch-agent/templates/namespace.yaml b/charts/launch-agent/templates/namespace.yaml index c0ddaf97..3ee2eaed 100644 --- a/charts/launch-agent/templates/namespace.yaml +++ b/charts/launch-agent/templates/namespace.yaml @@ -1,9 +1,11 @@ +--- apiVersion: v1 kind: Namespace metadata: - name: wandb + name: {{ .Values.namespace }} labels: pod-security.kubernetes.io/enforce: baseline pod-security.kubernetes.io/enforce-version: latest pod-security.kubernetes.io/warn: baseline - pod-security.kubernetes.io/warn-version: latest \ No newline at end of file + pod-security.kubernetes.io/warn-version: latest +... \ No newline at end of file diff --git a/charts/launch-agent/templates/rbac.yaml b/charts/launch-agent/templates/rbac.yaml index abf604e1..4f3ebd97 100644 --- a/charts/launch-agent/templates/rbac.yaml +++ b/charts/launch-agent/templates/rbac.yaml @@ -1,17 +1,19 @@ +--- apiVersion: v1 kind: ServiceAccount metadata: name: wandb-launch-serviceaccount - namespace: wandb + namespace: {{ .Values.namespace }} {{ if .Values.serviceAccount.annotations }} annotations: {{ toYaml .Values.serviceAccount.annotations | indent 4 }} {{ end }} +... --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: - namespace: wandb + namespace: {{ .Values.namespace }} name: wandb-launch-agent rules: - apiGroups: [""] @@ -20,6 +22,7 @@ rules: - apiGroups: ["batch"] resources: ["jobs", "jobs/status"] verbs: ["create", "get", "watch", "list", "update", "delete", "patch"] +... --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -40,26 +43,31 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: wandb-launch-role-binding - namespace: wandb + namespace: {{ .Values.namespace }} subjects: - kind: ServiceAccount name: wandb-launch-serviceaccount - namespace: wandb + namespace: {{ .Values.namespace }} roleRef: kind: Role name: wandb-launch-agent apiGroup: rbac.authorization.k8s.io +... +{{- $root := . -}} +{{- range $ns := append .Values.additionalTargetNamespaces .Values.namespace }} --- -# role binding to create ML jobs in another namespace (could use cluster role binding if we want to launch cluster wide) apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: wandb-launch-cluster-role-binding + namespace: {{ $ns }} subjects: - kind: ServiceAccount name: wandb-launch-serviceaccount - namespace: wandb + namespace: {{ $root.Values.namespace }} roleRef: kind: ClusterRole name: job-creator apiGroup: rbac.authorization.k8s.io +... +{{- end }} \ No newline at end of file diff --git a/charts/launch-agent/templates/secret.yaml b/charts/launch-agent/templates/secret.yaml index 666eef51..8018a852 100644 --- a/charts/launch-agent/templates/secret.yaml +++ b/charts/launch-agent/templates/secret.yaml @@ -1,25 +1,27 @@ +--- apiVersion: v1 kind: Secret metadata: name: wandb-api-key - namespace: wandb + namespace: {{ .Values.namespace }} type: kubernetes.io/basic-auth stringData: password: {{ required "Please set agent.apiKey to a W&B API key" .Values.agent.apiKey }} - +... {{- if .Values.gitCreds }} --- apiVersion: v1 kind: Secret metadata: name: git-creds - namespace: wandb + namespace: {{ .Values.namespace }} type: kubernetes.io/basic-auth stringData: .git-crededentials: {{ .Values.gitCreds }} .gitconfig: | [credential] helper = store +... {{ end }} {{ if .Values.azureStorageAccessKey }} diff --git a/charts/launch-agent/templates/volcano.yaml b/charts/launch-agent/templates/volcano.yaml index c2199e2d..db358e29 100644 --- a/charts/launch-agent/templates/volcano.yaml +++ b/charts/launch-agent/templates/volcano.yaml @@ -1,5 +1,5 @@ -# condition this whole file out if the user doesn't want to install the admission controller {{ if .Values.volcano }} +# condition this whole file out if the user doesn't want to install the admission controller apiVersion: v1 kind: Namespace metadata: diff --git a/charts/launch-agent/values.yaml b/charts/launch-agent/values.yaml index 842e77c0..3e513e48 100644 --- a/charts/launch-agent/values.yaml +++ b/charts/launch-agent/values.yaml @@ -12,6 +12,17 @@ agent: cpu: 1000m memory: 1Gi +# Namespace to deploy launch agent into +namespace: wandb + +# W&B api url (Set yours here) +baseUrl: https://api.wandb.ai + +# Additional target namespaces that the launch agent can deploy into +additionalTargetNamespaces: + - default + - wandb + # This should be set to the literal contents of your launch agent config. launchConfig: | @@ -26,6 +37,8 @@ gitCreds: | # Annotations for the wandb service account. Useful when setting up workload identity on gcp. serviceAccount: annotations: + iam.gke.io/gcp-service-account: + azure.workload.identity/client-id: # Set to access key for azure storage if using kaniko with azure. azureStorageAccessKey: ""