Skip to content

Commit

Permalink
Nopo11y multicluster support (#10)
Browse files Browse the repository at this point in the history
* Added support for multicluster

* Added path key in values

* Update defaultSLOs.yaml

Corrected the syntax for error query of availability SLO

* Updated latency SLO using nginx ingress metrics to accept latnecy value in ms

---------

Co-authored-by: Shehbaz Pathan (Consultant) <Shehbaz.Pathan@ril.com>
  • Loading branch information
shehbaz-pathan and Shehbaz Pathan (Consultant) authored Jun 4, 2024
1 parent 58f040f commit 6eeb7b5
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 35 deletions.
2 changes: 1 addition & 1 deletion charts/nopo11y/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ version: 1.0.2
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.0.1"
appVersion: "1.1.0"
9 changes: 5 additions & 4 deletions charts/nopo11y/templates/defaultAlerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ metadata:
name: {{ include "app.label" . }}-default-alert-rules
labels:
release: {{ .Values.prometheusReleaseLabel }}
managedby: nopo11y
spec:
groups:
- name: {{ include "app.label" . }}-default-alert-rules
rules:
{{- if .Values.istioMetrics.enabled }}
- alert: {{ include "app.label" . }}High5xxErrorRate
expr: sum(rate(istio_requests_total{app="{{ include "app.label" . }}", destination_app=~"{{ include "app.label" . }}", response_code=~"5.."}[5m])) by (instance) / sum(rate(istio_requests_total{app="{{ include "app.label" . }}", destination_app=~"{{ include "app.label" . }}"}[5m])) by (instance) * 100 > {{ .Values.errorRate5xx }}
expr: sum(rate(istio_requests_total{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}app="{{ include "app.label" . }}", destination_app=~"{{ include "app.label" . }}", response_code=~"5.."}[5m])) by (instance) / sum(rate(istio_requests_total{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}app="{{ include "app.label" . }}", destination_app=~"{{ include "app.label" . }}"}[5m])) by (instance) * 100 > {{ .Values.errorRate5xx }}
annotations:
description: {{ include "app.label" . }} service is experiencing high 5xx errors rate from last 5 minutes.
summary: {{ include "app.label" . }} service is experiencing high 5xx error rate.
Expand All @@ -22,7 +23,7 @@ spec:
labels:
severity: critical
- alert: {{ include "app.label" . }}High4xxErrorRate
expr: sum(rate(istio_requests_total{app="{{ include "app.label" . }}", destination_app=~"{{ include "app.label" . }}", response_code=~"4.."}[5m])) by (instance) / sum(rate(istio_requests_total{app="{{ include "app.label" . }}", destination_app=~"{{ include "app.label" . }}"}[5m])) by (instance) * 100 > {{ .Values.errorRate4xx }}
expr: sum(rate(istio_requests_total{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}app="{{ include "app.label" . }}", destination_app=~"{{ include "app.label" . }}", response_code=~"4.."}[5m])) by (instance) / sum(rate(istio_requests_total{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}app="{{ include "app.label" . }}", destination_app=~"{{ include "app.label" . }}"}[5m])) by (instance) * 100 > {{ .Values.errorRate4xx }}
for: 5m
annotations:
{{- if .Values.grafanaURL }}
Expand All @@ -35,7 +36,7 @@ spec:
{{- end }}
{{- if .Values.nginxIngressMetrics.enabled }}
- alert: {{ include "app.label" . }}IngressHigh5xxErrorRate
expr: sum(rate(nginx_ingress_controller_requests{ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}",status=~"5.."}[5m])) / sum(rate(nginx_ingress_controller_requests{ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}"}[5m])) * 100 > {{ .Values.errorRate5xx }}
expr: sum(rate(nginx_ingress_controller_requests{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}",status=~"5..", path="{{ .Values.nginxIngressMetrics.path }}"}[5m])) / sum(rate(nginx_ingress_controller_requests{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}", path="{{ .Values.nginxIngressMetrics.path }}"}[5m])) * 100 > {{ .Values.errorRate5xx }}
annotations:
description: {{ include "app.label" . }} service is experiencing high 5xx errors rate from last 5 minutes.
summary: {{ include "app.label" . }} is experiencing high 5xx error rate.
Expand All @@ -45,7 +46,7 @@ spec:
labels:
severity: critical
- alert: {{ include "app.label" . }}IngressHigh4xxErrorRate
expr: sum(rate(nginx_ingress_controller_requests{ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}",status=~"4.."}[5m])) / sum(rate(nginx_ingress_controller_requests{ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}"}[5m])) * 100 > {{ .Values.rrorRate4xx }}
expr: sum(rate(nginx_ingress_controller_requests{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}",status=~"4..", path="{{ .Values.nginxIngressMetrics.path }}"}[5m])) / sum(rate(nginx_ingress_controller_requests{ {{- if hasKey .Values "cluster" }}cluster="{{ .Values.cluster }}", {{- end }}ingress=~"{{ .Values.nginxIngressMetrics.ingressName }}", path="{{ .Values.nginxIngressMetrics.path }}"}[5m])) * 100 > {{ .Values.errorRate4xx }}
for: 10m
annotations:
description: {{ include "app.label" . }} service is experiencing high 4xx errors rate from last 5 minutes.
Expand Down
Loading

0 comments on commit 6eeb7b5

Please sign in to comment.