Skip to content

Commit

Permalink
Merge branch 'main' into issue-284
Browse files Browse the repository at this point in the history
  • Loading branch information
amanpruthi authored Sep 24, 2024
2 parents 3d2bf87 + 23c0b3f commit 8cd882c
Show file tree
Hide file tree
Showing 7 changed files with 314 additions and 12 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
dryrun.yaml
license.txt
test-values.yaml
.DS_Store
secret.*.yaml
284 changes: 284 additions & 0 deletions charts/operator-wandb/charts/app/templates/_deployment.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,284 @@
{{/*
This template is used to generate the deployment for the app, and is used for both the non-glue and glue deployments.
*/}}
{{- define "app.deployment" -}}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "app.fullname" . }}{{ .suffix }}
labels:
{{- include "wandb.commonLabels" . | nindent 4 }}
{{- include "app.commonLabels" . | nindent 4 }}
{{- include "app.labels" . | nindent 4 }}
{{- if .Values.deployment.labels }}
{{- toYaml .Values.deployment.labels | nindent 4 }}
{{- end }}
annotations:
{{- include "wandb.deploymentAnnotations" . | nindent 4 }}
{{- if .Values.deployment.annotations }}
{{- toYaml .Values.deployment.annotations | nindent 4 }}
{{- end }}
spec:
replicas: 1
selector:
matchLabels:
{{- include "wandb.selectorLabels" . | nindent 6 }}
{{- include "app.labels" . | nindent 6 }}
template:
metadata:
labels:
{{- include "wandb.podLabels" . | nindent 8 }}
{{- include "app.commonLabels" . | nindent 8 }}
{{- include "app.podLabels" . | nindent 8 }}
{{- include "app.labels" . | nindent 8 }}
annotations:
checksum/secret: {{ include (print $.Template.BasePath "/secrets.yaml") . | sha256sum }}
{{- if .Values.pod.annotations }}
{{- toYaml .Values.pod.annotations | nindent 8 }}
{{- end }}
spec:
serviceAccountName: {{ include "app.serviceAccountName" . }}
{{- if .tolerations }}
tolerations:
{{- toYaml .tolerations | nindent 8 }}
{{- end }}
{{- include "wandb.nodeSelector" . | nindent 6 }}
{{- include "wandb.priorityClassName" . | nindent 6 }}
{{- include "wandb.podSecurityContext" .Values.pod.securityContext | nindent 6 }}
terminationGracePeriodSeconds: 60
initContainers:
- name: init-db
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
env:
- name: MYSQL_PORT
value: "{{ include "wandb.mysql.port" . }}"
- name: MYSQL_HOST
value: "{{ include "wandb.mysql.host" . }}"
- name: MYSQL_DATABASE
value: "{{ include "wandb.mysql.database" . }}"
- name: MYSQL_USER
value: "{{ include "wandb.mysql.user" . }}"
- name: MYSQL_PASSWORD
valueFrom:
secretKeyRef:
name: {{ include "wandb.mysql.passwordSecret" . }}
key: MYSQL_PASSWORD
command: ['bash', '-c', "until mysql -h$MYSQL_HOST -u$MYSQL_USER -p$MYSQL_PASSWORD -D$MYSQL_DATABASE -P$MYSQL_PORT --execute=\"SELECT 1\"; do echo waiting for db; sleep 2; done"]
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
volumeMounts:
{{- if ne (include "wandb.redis.caCert" .) "" }}
- name: {{ include "app.fullname" . }}-redis-ca
mountPath: /etc/ssl/certs/redis_ca.pem
subPath: redis_ca.pem
{{- end }}
{{- range $index, $v := .Values.global.customCACerts }}
- name: wandb-ca-certs
mountPath: /usr/local/share/ca-certificates/customCA{{$index}}.crt
subPath: customCA{{$index}}.crt
{{- end }}
ports:
- name: http
containerPort: 8080
protocol: TCP
- name: prometheus
containerPort: 8181
protocol: TCP
- name: gorilla-statsd
containerPort: 8125
protocol: TCP
env:
- name: GLUE_ENABLED
value: "{{ .glueSingletonEnabled }}"
{{- if .onlyService }}
- name: ONLY_SERVICE
value: {{ .onlyService }}
{{- end }}
- name: HOST
value: "{{ .Values.global.host }}"
{{- if .Values.extraCors }}
- name: GORILLA_CORS_ORIGINS
value: "{{ join "," .Values.extraCors }}"
{{- end }}
- name: MYSQL_PORT
value: "{{ include "wandb.mysql.port" . }}"
- name: MYSQL_HOST
value: "{{ include "wandb.mysql.host" . }}"
- name: MYSQL_DATABASE
value: "{{ include "wandb.mysql.database" . }}"
- name: MYSQL_USER
value: "{{ include "wandb.mysql.user" . }}"
- name: MYSQL_PASSWORD
valueFrom:
secretKeyRef:
name: {{ include "wandb.mysql.passwordSecret" . }}
key: MYSQL_PASSWORD
- name: MYSQL
value: "mysql://$(MYSQL_USER):$(MYSQL_PASSWORD)@$(MYSQL_HOST):$(MYSQL_PORT)/$(MYSQL_DATABASE)"
- name: WEAVE_SERVICE
value: "{{ .Release.Name }}-weave:9994"
- name: PARQUET_HOST
value: "http://{{ .Release.Name }}-parquet:8087"
- name: PARQUET_ENABLED
value: "true"
{{- if index .Values.global "weave-trace" "enabled" }}
- name: WEAVE_TRACES_ENABLED
value: "true"
{{- end }}
{{- if ne (include "wandb.redis.password" .) "" }}
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: {{ include "wandb.redis.passwordSecret" . }}
key: REDIS_PASSWORD
{{- end }}
- name: REDIS_PORT
value: "{{ include "wandb.redis.port" . }}"
- name: REDIS_HOST
value: "{{ include "wandb.redis.host" . }}"
- name: REDIS
value: "{{ include "app.redis" . | trim }}"
- name: SLACK_CLIENT_ID
value: {{ .Values.global.slack.clientId | quote }}
- name: SLACK_SECRET
valueFrom:
secretKeyRef:
name: {{ include "app.fullname" . }}-config
key: SLACK_SECRET
optional: true
{{- if ne .Values.global.email.smtp.host "" }}
- name: GORILLA_EMAIL_SINK
value: "smtp://{{ .Values.global.email.smtp.user }}:{{ .Values.global.email.smtp.password }}@{{ .Values.global.email.smtp.host }}:{{ .Values.global.email.smtp.port }}"
{{- end }}
- name: LICENSE
valueFrom:
secretKeyRef:
name: {{ include "app.fullname" . }}-config
key: LICENSE
optional: true
- name: GORILLA_LICENSE
valueFrom:
secretKeyRef:
name: {{ include "app.fullname" . }}-config
key: LICENSE
optional: true
{{- if ne .Values.global.auth.oidc.clientId "" }}
- name: OIDC_CLIENT_ID
value: {{ .Values.global.auth.oidc.clientId }}
- name: OIDC_AUTH_METHOD
value: {{ .Values.global.auth.oidc.authMethod }}
- name: OIDC_ISSUER
value: {{ .Values.global.auth.oidc.issuer }}
- name: OIDC_CLIENT_SECRET
value: {{ .Values.global.auth.oidc.secret }}
{{- end }}
- name: GORILLA_SESSION_LENGTH
value: "{{ .Values.global.auth.sessionLengthHours }}h"
{{- if and .Values.global .Values.global.observability }}
{{- if eq (default "custom" .Values.global.observability.mode) "otel" }}
- name: GORILLA_STATSD_PORT
value: "8125"
- name: GORILLA_STATSD_HOST
value: "0.0.0.0"
{{- end }}
{{- end }}
- name: BUCKET
value: "{{ include "app.bucket" . }}"
- name: AWS_REGION
value: {{ .Values.global.bucket.region | default .Values.global.defaultBucket.region }}
- name: AWS_S3_KMS_ID
value: "{{ .Values.global.bucket.kmsKey | default .Values.global.defaultBucket.kmsKey }}"
- name: OPERATOR_ENABLED
value: 'true'
- name: LOGGING_ENABLED
value: 'true'
- name: AZURE_STORAGE_KEY
valueFrom:
secretKeyRef:
name: "{{ include "wandb.bucket.secret" . }}"
key: ACCESS_KEY
optional: true
- name: GORILLA_CUSTOMER_SECRET_STORE_K8S_CONFIG_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: G_HOST_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
- name: BANNERS
value: {{ toJson .Values.global.banners | quote }}
{{- if ne .Values.traceRatio 0.0 }}
- name: GORILLA_TRACER
value: "otlp+grpc://{{ .Release.Name }}-otel-daemonset:4317?trace_ratio={{ .Values.traceRatio }}"
{{- end }}
- name: KAFKA_BROKER_HOST
value: "{{ include "wandb.kafka.brokerHost" . }}"
- name: KAFKA_BROKER_PORT
value: "{{ include "wandb.kafka.brokerPort" . }}"
- name: KAFKA_CLIENT_USER
value: "{{ include "wandb.kafka.user" . }}"
- name: KAFKA_CLIENT_PASSWORD
valueFrom:
secretKeyRef:
name: {{ include "wandb.kafka.passwordSecret" . }}
key: KAFKA_CLIENT_PASSWORD
- name: KAFKA_TOPIC_RUN_UPDATE_SHADOW_QUEUE
value: {{ include "wandb.kafka.runUpdatesShadowTopic" .}}
- name: KAFKA_RUN_UPDATE_SHADOW_QUEUE_NUM_PARTITIONS
value: "{{ include "wandb.kafka.runUpdatesShadowNumPartitions" .}}"
- name: OVERFLOW_BUCKET_ADDR
value: "{{ include "app.bucket" .}}"
- name: GORILLA_RUN_UPDATE_SHADOW_QUEUE
value: >
{
"overflow-bucket": {
"store": "$(OVERFLOW_BUCKET_ADDR)",
"name": "wandb",
"prefix": "wandb-overflow"
},
"addr": "kafka://$(KAFKA_CLIENT_USER):$(KAFKA_CLIENT_PASSWORD)@$(KAFKA_BROKER_HOST):$(KAFKA_BROKER_PORT)/$(KAFKA_TOPIC_RUN_UPDATE_SHADOW_QUEUE)?producer_batch_bytes=1048576&num_partitions=$(KAFKA_RUN_UPDATE_SHADOW_QUEUE_NUM_PARTITIONS)"
}
{{- include "app.extraEnv" (dict "global" $.Values.global "local" .Values) | nindent 12 }}
{{- include "wandb.extraEnvFrom" (dict "root" $ "local" .) | nindent 12 }}
{{- if .healthCheckEnabled }}
livenessProbe:
httpGet:
path: /healthz
port: http
readinessProbe:
httpGet:
path: /ready
port: http
initialDelaySeconds: 20
periodSeconds: 5
startupProbe:
httpGet:
path: /ready
port: http
initialDelaySeconds: 20
periodSeconds: 5
failureThreshold: 120
lifecycle:
preStop:
exec:
command: ["sleep", "25"]
{{- end }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
volumes:
{{- if ne (include "wandb.redis.caCert" .) "" }}
- name: {{ include "app.fullname" . }}-redis-ca
secret:
secretName: "{{ include "wandb.redis.passwordSecret" . }}"
items:
- key: REDIS_CA_CERT
path: redis_ca.pem
{{- end }}
{{- if .Values.global.customCACerts }}
- name: wandb-ca-certs
configMap:
name: {{ include "wandb.fullname" . }}-ca-certs
{{- end }}
{{- end }}
4 changes: 2 additions & 2 deletions charts/operator-wandb/charts/app/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ app.kubernetes.io/managed-by: {{ .Release.Service }}
Selector labels
*/}}
{{- define "app.selectorLabels" -}}
app.kubernetes.io/name: {{ include "app.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/name: {{ include "app.name" . }}{{ .suffix }}
app.kubernetes.io/instance: {{ .Release.Name }}{{ .suffix }}
{{- end }}

{{/*
Expand Down
6 changes: 4 additions & 2 deletions charts/operator-wandb/charts/app/templates/hpa.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- if and .Values.autoscaling.hpa.enabled .Values.glueSingleton.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
Expand All @@ -12,12 +13,13 @@ spec:
apiVersion: apps/v1
kind: Deployment
name: {{ include "app.fullname" . }}
minReplicas: 1
maxReplicas: 1
minReplicas: {{ .Values.autoscaling.hpa.minReplicas }}
maxReplicas: {{ .Values.autoscaling.hpa.maxReplicas }}
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
{{- end }}
9 changes: 9 additions & 0 deletions charts/operator-wandb/charts/app/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@ image:
pullPolicy: Always
# pullSecrets: []

glueSingleton:
enabled: false

autoscaling:
hpa:
enabled: false
minReplicas: 1
maxReplicas: 1

# Tolerations for pod scheduling
tolerations: []

Expand Down
19 changes: 12 additions & 7 deletions charts/operator-wandb/local-development.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ az account set --subscription <Subscription_ID>
az aks get-credentials --resource-group <Resource_Group_Name> --name <AKS_Cluster_Name>
```


#### GCP

Authenticate with the Google Cloud SDK:
Expand All @@ -56,13 +55,13 @@ cd helm-charts
Extract the current values from the deployed Helm chart and scale down the `wandb-controller-manager` deployment:

```bash
helm get values wandb > operator-spec.yaml
helm get values wandb > secret.operator-spec.yaml
kubectl scale --replicas=0 deployment -n wandb wandb-controller-manager
```

### 4. Develop and Test Your Changes

After extracting the current chart values into `operator-spec.yaml`, you can start making your changes to the chart or the operator specifications.
After extracting the current chart values into `secret.operator-spec.yaml`, you can start making your changes to the chart or the operator specifications.

#### Building Dependencies

Expand All @@ -77,9 +76,15 @@ helm dependency build ./charts/operator-wandb
To apply your changes, upgrade the Helm release with your modified specifications:

```bash
# Helm template command
helm template wandb \
./charts/operator-wandb -f ./secret.operator-spec.yaml > secret.template.yaml

# Helm upgrade command
helm upgrade \
--install wandb \
./charts/operator-wandb -f ./operator-spec.yaml
./charts/operator-wandb -f ./secret.operator-spec.yaml

```

### 5. Finalizing Development
Expand All @@ -89,9 +94,9 @@ After completing your development work:
1. Ensure to increment the version in `Chart.yaml` of your Helm chart, e.g., `0.10.43`.
2. Scale the `wandb-controller-manager` deployment back up:

```bash
kubectl scale --replicas=1 deployment -n wandb wandb-controller-manager
```
```bash
kubectl scale --replicas=1 deployment -n wandb wandb-controller-manager
```

## Contributing

Expand Down
Loading

0 comments on commit 8cd882c

Please sign in to comment.