diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 9d02829b..5a7183bc 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.12.1 +version: 0.12.2 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/app/templates/deployment.yaml b/charts/operator-wandb/charts/app/templates/deployment.yaml index 356c5dae..66b82e7e 100644 --- a/charts/operator-wandb/charts/app/templates/deployment.yaml +++ b/charts/operator-wandb/charts/app/templates/deployment.yaml @@ -43,6 +43,9 @@ spec: {{- include "wandb.nodeSelector" . | nindent 6 }} {{- include "wandb.priorityClassName" . | nindent 6 }} {{- include "wandb.podSecurityContext" .Values.pod.securityContext | nindent 6 }} + # Extend the pods shutdown grace period from the default of 30s to 60s. + # This goes in the pod template spec. + terminationGracePeriodSeconds: 60 initContainers: - name: init-db image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" @@ -217,11 +220,20 @@ spec: httpGet: path: /ready port: http + initialDelaySeconds: 20 + periodSeconds: 5 startupProbe: httpGet: path: /ready port: http + initialDelaySeconds: 20 + periodSeconds: 5 failureThreshold: 120 + # Increase the sleep before SIGTERM to 25s. I had this as 5s previously and it wasn't enough. + lifecycle: + preStop: + exec: + command: ["sleep", "25"] resources: {{- toYaml .Values.resources | nindent 12 }} diff --git a/charts/operator-wandb/charts/console/templates/deployment.yaml b/charts/operator-wandb/charts/console/templates/deployment.yaml index f33a9007..f2dd6de9 100644 --- a/charts/operator-wandb/charts/console/templates/deployment.yaml +++ b/charts/operator-wandb/charts/console/templates/deployment.yaml @@ -42,6 +42,9 @@ spec: {{- include "wandb.nodeSelector" . | nindent 6 }} {{- include "wandb.priorityClassName" . | nindent 6 }} {{- include "wandb.podSecurityContext" .Values.pod.securityContext | nindent 6 }} + # Extend the pods shutdown grace period from the default of 30s to 60s. + # This goes in the pod template spec. + terminationGracePeriodSeconds: 60 containers: - name: {{ .Chart.Name }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" @@ -76,11 +79,20 @@ spec: httpGet: path: /console/api/ready port: http + initialDelaySeconds: 20 + periodSeconds: 5 startupProbe: httpGet: path: /console/api/ready port: http + initialDelaySeconds: 20 + periodSeconds: 5 failureThreshold: 120 + # Increase the sleep before SIGTERM to 25s. I had this as 5s previously and it wasn't enough. + lifecycle: + preStop: + exec: + command: ["sleep", "25"] resources: {{- toYaml .Values.resources | nindent 12 }}