Skip to content

Commit

Permalink
feat: added stackdriver support for gcp metrics (#141)
Browse files Browse the repository at this point in the history
Co-authored-by: Aditya Choudhari <aditya.choudhari@wandb.com>
  • Loading branch information
amanpruthi and adityachoudhari26 authored Jun 5, 2024
1 parent 66db270 commit f88e87b
Show file tree
Hide file tree
Showing 10 changed files with 411 additions and 3 deletions.
7 changes: 5 additions & 2 deletions charts/operator-wandb/Chart.lock
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,8 @@ dependencies:
- name: nginx
repository: file://charts/nginx
version: 0.1.0
digest: sha256:c6f5bb38d125207b54902f5638451b55807074ea700ff8eea4ea0af2dc19e346
generated: "2024-05-17T16:55:33.068309+05:30"
- name: stackdriver
repository: file://charts/stackdriver
version: 0.1.0
digest: sha256:9a6c69506deb6969686d5b220a0692b53cfa29642e059bdf27c440c5d7086bdb
generated: "2024-06-05T11:04:02.508473-07:00"
6 changes: 5 additions & 1 deletion charts/operator-wandb/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: v2
name: operator-wandb
description: A Helm chart for deploying W&B to Kubernetes
type: application
version: 0.13.9
version: 0.13.10
appVersion: 1.0.0
icon: https://wandb.ai/logo.svg

Expand Down Expand Up @@ -56,3 +56,7 @@ dependencies:
version: "*.*.*"
repository: file://charts/nginx
condition: nginx.install
- name: stackdriver
version: "*.*.*"
repository: file://charts/stackdriver
condition: stackdriver.install
23 changes: 23 additions & 0 deletions charts/operator-wandb/charts/stackdriver/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
5 changes: 5 additions & 0 deletions charts/operator-wandb/charts/stackdriver/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
apiVersion: v2
name: stackdriver
description: A Helm chart for Kubernetes
version: 0.1.0
appVersion: "0.15.0"
101 changes: 101 additions & 0 deletions charts/operator-wandb/charts/stackdriver/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
{{/* vim: set filetype=mustache: */}}

{{/*
Expand the name of the chart.
*/}}
{{- define "stackdriver.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "stackdriver.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "stackdriver.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "stackdriver.labels" -}}
helm.sh/chart: {{ include "stackdriver.chart" . }}
{{ include "stackdriver.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
wandb.com/app-name: {{ include "stackdriver.chart" . }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}

{{/*
Selector labels
*/}}
{{- define "stackdriver.selectorLabels" -}}
app.kubernetes.io/name: {{ include "stackdriver.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

{{/*
Create the name of the service account to use
*/}}
{{- define "stackdriver.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "stackdriver.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

{{/*
Returns the extraEnv keys and values to inject into containers.
Global values will override any chart-specific values.
*/}}
{{- define "stackdriver.extraEnv" -}}
{{- $allExtraEnv := merge (default (dict) .local.extraEnv) .global.extraEnv -}}
{{- range $key, $value := $allExtraEnv }}
- name: {{ $key }}
value: {{ $value | quote }}
{{- end -}}
{{- end -}}

{{/*
Returns a list of _common_ labels to be shared across all
app deployments and other shared objects.
*/}}
{{- define "stackdriver.commonLabels" -}}
{{- $commonLabels := default (dict) .Values.common.labels -}}
{{- if $commonLabels }}
{{- range $key, $value := $commonLabels }}
{{ $key }}: {{ $value | quote }}
{{- end }}
{{- end -}}
{{- end -}}

{{/*
Returns a list of _pod_ labels to be shared across all
app deployments.
*/}}
{{- define "stackdriver.podLabels" -}}
{{- range $key, $value := .Values.pod.labels }}
{{ $key }}: {{ $value | quote }}
{{- end }}
{{- end -}}
123 changes: 123 additions & 0 deletions charts/operator-wandb/charts/stackdriver/templates/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
{{- if .Values.enabled }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "stackdriver.fullname" . }}
labels:
{{- include "wandb.commonLabels" . | nindent 4 }}
{{- include "stackdriver.commonLabels" . | nindent 4 }}
{{- include "stackdriver.labels" . | nindent 4 }}
{{- if .Values.deployment.labels -}}
{{- toYaml .Values.deployment.labels | nindent 4 }}
{{- end }}
annotations:
{{- include "wandb.deploymentAnnotations" $ | nindent 4 }}
{{- if .Values.deployment.annotations -}}
{{- toYaml .Values.deployment.annotations | nindent 4 }}
{{- end }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
{{- include "wandb.selectorLabels" $ | nindent 6 }}
{{- include "stackdriver.labels" . | nindent 6 }}
template:
metadata:
labels:
{{- include "wandb.podLabels" . | nindent 8 }}
{{- include "stackdriver.commonLabels" . | nindent 8 }}
{{- include "stackdriver.podLabels" . | nindent 8 }}
{{- include "stackdriver.labels" . | nindent 8 }}
annotations:
{{- if .Values.pod.annotations -}}
{{- toYaml .Values.pod.annotations | nindent 8 }}
{{- end }}
spec:
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
command: ["stackdriver_exporter"]
volumeMounts:
{{- if or .Values.stackdriver.serviceAccountSecret .Values.stackdriver.serviceAccountKey }}
- name: stackdriver-service-account
mountPath: /etc/secrets/service-account/
{{- end}}
args:
- --google.project-id={{ .Values.stackdriver.projectId }}
- --monitoring.metrics-interval={{ .Values.stackdriver.metrics.interval }}
- --monitoring.metrics-offset={{ .Values.stackdriver.metrics.offset }}
- --monitoring.metrics-type-prefixes={{ .Values.stackdriver.metrics.typePrefixes | replace " " "" }}
{{- range .Values.stackdriver.metrics.filters }}
- --monitoring.filters={{ . }}
{{- end }}
- --stackdriver.backoff-jitter={{ .Values.stackdriver.backoffJitter }}
- --stackdriver.http-timeout={{ .Values.stackdriver.httpTimeout }}
- --stackdriver.max-backoff={{ .Values.stackdriver.maxBackoff }}
- --stackdriver.max-retries={{ .Values.stackdriver.maxRetries }}
- --stackdriver.retry-statuses={{ .Values.stackdriver.retryStatuses }}
- --web.listen-address={{ .Values.web.listenAddress }}
- --web.telemetry-path={{ .Values.web.path }}
{{- if .Values.stackdriver.dropDelegatedProjects }}
- --monitoring.drop-delegated-projects
{{- end }}
{{- if .Values.stackdriver.metrics.ingestDelay }}
- --monitoring.metrics-ingest-delay
{{- end }}
{{- if .Values.stackdriver.metrics.aggregateDeltas }}
- --monitoring.aggregate-deltas
- --monitoring.aggregate-deltas-ttl={{ .Values.stackdriver.metrics.aggregateDeltasTTL }}
{{- end }}
{{- if .Values.extraArgs }}
{{- range $key, $value := .Values.extraArgs }}
{{- if $value }}
- --{{ $key }}={{ $value }}
{{- end }}
{{- end }}
{{- end }}
{{- if or .Values.stackdriver.serviceAccountSecret .Values.stackdriver.serviceAccountKey }}
env:
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /etc/secrets/service-account/credentials.json
{{- end }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
ports:
- containerPort: {{ .Values.service.httpPort }}
name: http
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 30
timeoutSeconds: 10
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 10
timeoutSeconds: 10
restartPolicy: {{ .Values.restartPolicy }}
serviceAccountName: {{ include "stackdriver.serviceAccountName" . }}
{{- if .tolerations }}
tolerations:
{{- toYaml .tolerations | nindent 8 }}
{{- end }}
{{- include "wandb.nodeSelector" . | nindent 6 }}
{{- include "wandb.priorityClassName" . | nindent 6 }}
{{- include "wandb.podSecurityContext" .Values.pod.securityContext | nindent 6 }}
volumes:
{{- if .Values.stackdriver.serviceAccountSecret }}
- name: stackdriver-service-account
secret:
secretName: {{ .Values.stackdriver.serviceAccountSecret | quote }}
{{- if and (.Values.stackdriver.serviceAccountSecret) (.Values.stackdriver.serviceAccountSecretKey) }}
items:
- key: {{ .Values.stackdriver.serviceAccountSecretKey | quote }}
path: credentials.json
{{- end }}
{{- else if .Values.stackdriver.serviceAccountKey }}
- name: stackdriver-service-account
secret:
secretName: {{ template "stackdriver-exporter.fullname" . }}
{{- end}}
{{- end }}
23 changes: 23 additions & 0 deletions charts/operator-wandb/charts/stackdriver/templates/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "stackdriver.fullname" . }}
labels:
{{- include "wandb.commonLabels" . | nindent 4 }}
{{- include "stackdriver.labels" . | nindent 4 }}
{{- include "stackdriver.commonLabels" . | nindent 4 }}
{{- if .Values.service.labels -}}
{{- toYaml .Values.service.labels | nindent 4 }}
{{- end }}
annotations:
{{- if .Values.service.annotations -}}
{{- toYaml .Values.service.annotations | nindent 4 }}
{{- end }}
spec:
type: {{ .Values.service.type }}
ports:
- port: 9255
protocol: TCP
name: stackdriver
selector:
{{- include "stackdriver.labels" . | nindent 4 }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "stackdriver.serviceAccountName" . }}
labels:
{{- include "wandb.commonLabels" . | nindent 4 }}
{{- include "stackdriver.commonLabels" . | nindent 4 }}
{{- include "stackdriver.labels" . | nindent 4 }}
{{- if .Values.serviceAccount.labels -}}
{{- toYaml .Values.serviceAccount.labels | nindent 4 }}
{{- end }}
annotations:
{{- if .Values.serviceAccount.annotations -}}
{{- toYaml .Values.serviceAccount.annotations | nindent 4 }}
{{- end }}
Loading

0 comments on commit f88e87b

Please sign in to comment.