Skip to content

Commit

Permalink
add basic reconciler metrics (#689)
Browse files Browse the repository at this point in the history
* add basic reconciler metrics

* fix checkstyle

* add to deployments

* fix controller

* expose the metrics service

* fix ci
  • Loading branch information
freeznet authored Sep 19, 2023
1 parent b2f3a96 commit ec754f2
Show file tree
Hide file tree
Showing 14 changed files with 397 additions and 2 deletions.
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,9 @@ redhat-certificated-image-build:
redhat-certificated-image-push: ## Push the bundle image.
echo $(OPERATOR_IMG)
$(MAKE) image-push IMG=$(OPERATOR_IMG)

##@ Generate the metrics documentation
.PHONY: generate-metricsdocs
generate-metricsdocs:
mkdir -p $(shell pwd)/docs/monitoring
go run -ldflags="${LDFLAGS}" ./pkg/monitoring/metricsdocs > docs/monitoring/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ spec:
app.kubernetes.io/name: {{ template "function-mesh-operator.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: controller-manager
annotations:
prometheus.io/scrape: "true"
prometheus.io/path: "/metrics"
prometheus.io/port: "{{ .Values.controllerManager.metrics.port }}"
spec:
{{- if .Values.controllerManager.serviceAccount }}
serviceAccount: {{ .Values.controllerManager.serviceAccount }}
Expand All @@ -38,12 +42,15 @@ spec:
resources:
{{ toYaml .Values.controllerManager.resources | indent 12 }}
{{- end }}
{{- if .Values.admissionWebhook.enabled }}
ports:
{{- if .Values.admissionWebhook.enabled }}
- containerPort: 9443
name: webhook-server
protocol: TCP
{{- end}}
- containerPort: {{ .Values.controllerManager.metrics.port }}
name: http-metrics
protocol: TCP
command:
- /manager
args:
Expand All @@ -63,6 +70,18 @@ spec:
value: {{ .Values.admissionWebhook.enabled | quote }}
volumeMounts:
{{- include "function-mesh-operator.volumeMounts" . | nindent 8 }}
livenessProbe:
httpGet:
path: /healthz
port: {{ .Values.controllerManager.healthProbe.port }}
initialDelaySeconds: 15
periodSeconds: 20
readinessProbe:
httpGet:
path: /readyz
port: {{ .Values.controllerManager.healthProbe.port }}
initialDelaySeconds: 5
periodSeconds: 10
{{- with .Values.controllerManager.nodeSelector }}
nodeSelector:
{{ toYaml . | indent 8 }}
Expand All @@ -79,4 +98,4 @@ spec:
priorityClassName: {{ .Values.controllerManager.priorityClassName }}
{{- end }}
volumes:
{{- include "function-mesh-operator.volumes" . | nindent 6 -}}
{{- include "function-mesh-operator.volumes" . | nindent 6 -}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: v1
kind: Service
metadata:
name: function-mesh-controller-manager-metrics-service
namespace: {{ .Release.Namespace }}
labels:
{{- include "function-mesh-operator.labels" . | nindent 4 }}
spec:
ports:
- name: http-metrics
port: {{ .Values.controllerManager.metrics.port }}
protocol: TCP
targetPort: {{ .Values.controllerManager.metrics.port }}
selector:
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: controller-manager
3 changes: 3 additions & 0 deletions config/default/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ patchesStrategicMerge:
# 'CERTMANAGER' needs to be enabled to use ca injection
- webhookcainjection_patch.yaml

# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
- manager_metrics_patch.yaml

# the following config is for teaching kustomize how to do var substitution
vars:
# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix.
Expand Down
19 changes: 19 additions & 0 deletions config/default/manager_metrics_patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: controller-manager
namespace: system
spec:
template:
metadata:
annotations:
prometheus.io/scrape: "true"
prometheus.io/path: "/metrics"
prometheus.io/port: "8080"
spec:
containers:
- name: manager
ports:
- containerPort: 8080
name: http-metrics
protocol: TCP
12 changes: 12 additions & 0 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,17 @@ spec:
requests:
cpu: 100m
memory: 20Mi
livenessProbe:
httpGet:
path: /healthz
port: 8000
initialDelaySeconds: 15
periodSeconds: 20
readinessProbe:
httpGet:
path: /readyz
port: 8000
initialDelaySeconds: 5
periodSeconds: 10
serviceAccountName: controller-manager
terminationGracePeriodSeconds: 10
12 changes: 12 additions & 0 deletions controllers/function_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ package controllers

import (
"context"
"time"

"github.com/streamnative/function-mesh/pkg/monitoring"

v1 "k8s.io/api/batch/v1"
"k8s.io/client-go/rest"
Expand Down Expand Up @@ -63,6 +66,15 @@ type FunctionReconciler struct {
func (r *FunctionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
_ = r.Log.WithValues("function", req.NamespacedName)

startTime := time.Now()

defer func() {
monitoring.FunctionMeshControllerReconcileCount.WithLabelValues("function", req.NamespacedName.Name,
req.NamespacedName.Namespace).Inc()
monitoring.FunctionMeshControllerReconcileLatency.WithLabelValues("function", req.NamespacedName.Name,
req.NamespacedName.Namespace).Observe(float64(time.Since(startTime).Milliseconds()))
}()

// your logic here
function := &v1alpha1.Function{}
err := r.Get(ctx, req.NamespacedName, function)
Expand Down
12 changes: 12 additions & 0 deletions controllers/sink_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ package controllers

import (
"context"
"time"

"github.com/streamnative/function-mesh/pkg/monitoring"

v1 "k8s.io/api/batch/v1"
"k8s.io/client-go/rest"
Expand Down Expand Up @@ -62,6 +65,15 @@ type SinkReconciler struct {
func (r *SinkReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
_ = r.Log.WithValues("sink", req.NamespacedName)

startTime := time.Now()

defer func() {
monitoring.FunctionMeshControllerReconcileCount.WithLabelValues("sink", req.NamespacedName.Name,
req.NamespacedName.Namespace).Inc()
monitoring.FunctionMeshControllerReconcileLatency.WithLabelValues("sink", req.NamespacedName.Name,
req.NamespacedName.Namespace).Observe(float64(time.Since(startTime).Milliseconds()))
}()

// your logic here
sink := &v1alpha1.Sink{}
err := r.Get(ctx, req.NamespacedName, sink)
Expand Down
12 changes: 12 additions & 0 deletions controllers/source_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ package controllers

import (
"context"
"time"

"github.com/streamnative/function-mesh/pkg/monitoring"

"github.com/go-logr/logr"
"github.com/streamnative/function-mesh/api/compute/v1alpha1"
Expand Down Expand Up @@ -62,6 +65,15 @@ type SourceReconciler struct {
func (r *SourceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
_ = r.Log.WithValues("source", req.NamespacedName)

startTime := time.Now()

defer func() {
monitoring.FunctionMeshControllerReconcileCount.WithLabelValues("source", req.NamespacedName.Name,
req.NamespacedName.Namespace).Inc()
monitoring.FunctionMeshControllerReconcileLatency.WithLabelValues("source", req.NamespacedName.Name,
req.NamespacedName.Namespace).Observe(float64(time.Since(startTime).Milliseconds()))
}()

// your logic here
source := &v1alpha1.Source{}
err := r.Get(ctx, req.NamespacedName, source)
Expand Down
13 changes: 13 additions & 0 deletions docs/monitoring/metrics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Operator Metrics
This document aims to help users that are not familiar with metrics exposed by this operator.
The metrics documentation is auto-generated by the utility tool "pkg/monitoring/metricsdocs" and reflects all of the metrics that are exposed by the operator.

## Operator Metrics List
### function_mesh_reconcile_count
Number of reconcile operations Type: Counter.
### function_mesh_reconcile_latency
Latency of reconcile operations, bucket boundaries are 10ms, 100ms, 1s, 10s, 30s and 60s. Type: Histogram.
## Developing new metrics
After developing new metrics or changing old ones, please run "make generate-metricsdocs" to regenerate this document.

If you feel that the new metric doesn't follow these rules, please change "pkg/monitoring/metricsdocs" according to your needs.
15 changes: 15 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ import (
"os"
"strconv"

"sigs.k8s.io/controller-runtime/pkg/healthz"

"github.com/streamnative/function-mesh/pkg/monitoring"

"github.com/go-logr/logr"
computev1alpha1 "github.com/streamnative/function-mesh/api/compute/v1alpha1"
"github.com/streamnative/function-mesh/controllers"
Expand Down Expand Up @@ -53,6 +57,8 @@ func init() {
utilruntime.Must(clientgoscheme.AddToScheme(scheme))

utilruntime.Must(computev1alpha1.AddToScheme(scheme))

monitoring.RegisterMetrics()
// +kubebuilder:scaffold:scheme
}

Expand Down Expand Up @@ -203,6 +209,15 @@ func main() {
}
// +kubebuilder:scaffold:builder

if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up health check")
os.Exit(1)
}
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up ready check")
os.Exit(1)
}

setupLog.Info("starting manager")
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
setupLog.Error(err, "problem running manager")
Expand Down
50 changes: 50 additions & 0 deletions manifests/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ spec:
type: string
downloaderImage:
type: string
filebeatImage:
type: string
forwardSourceMessageProperty:
type: boolean
funcConfig:
Expand Down Expand Up @@ -256,6 +258,11 @@ spec:
type: object
logTopic:
type: string
logTopicAgent:
enum:
- runtime
- sidecar
type: string
maxMessageRetry:
format: int32
type: integer
Expand Down Expand Up @@ -3623,6 +3630,8 @@ spec:
type: string
downloaderImage:
type: string
filebeatImage:
type: string
golang:
properties:
go:
Expand Down Expand Up @@ -3817,6 +3826,13 @@ spec:
required:
- jar
type: object
logTopic:
type: string
logTopicAgent:
enum:
- runtime
- sidecar
type: string
maxMessageRetry:
format: int32
type: integer
Expand Down Expand Up @@ -6929,6 +6945,8 @@ spec:
type: string
downloaderImage:
type: string
filebeatImage:
type: string
forwardSourceMessageProperty:
type: boolean
golang:
Expand Down Expand Up @@ -7053,6 +7071,13 @@ spec:
required:
- jar
type: object
logTopic:
type: string
logTopicAgent:
enum:
- runtime
- sidecar
type: string
maxReplicas:
format: int32
type: integer
Expand Down Expand Up @@ -10300,6 +10325,8 @@ spec:
type: string
downloaderImage:
type: string
filebeatImage:
type: string
forwardSourceMessageProperty:
type: boolean
funcConfig:
Expand Down Expand Up @@ -10501,6 +10528,11 @@ spec:
type: object
logTopic:
type: string
logTopicAgent:
enum:
- runtime
- sidecar
type: string
maxMessageRetry:
format: int32
type: integer
Expand Down Expand Up @@ -13941,6 +13973,8 @@ spec:
type: string
downloaderImage:
type: string
filebeatImage:
type: string
golang:
properties:
go:
Expand Down Expand Up @@ -14135,6 +14169,13 @@ spec:
required:
- jar
type: object
logTopic:
type: string
logTopicAgent:
enum:
- runtime
- sidecar
type: string
maxMessageRetry:
format: int32
type: integer
Expand Down Expand Up @@ -17320,6 +17361,8 @@ spec:
type: string
downloaderImage:
type: string
filebeatImage:
type: string
forwardSourceMessageProperty:
type: boolean
golang:
Expand Down Expand Up @@ -17444,6 +17487,13 @@ spec:
required:
- jar
type: object
logTopic:
type: string
logTopicAgent:
enum:
- runtime
- sidecar
type: string
maxReplicas:
format: int32
type: integer
Expand Down
Loading

0 comments on commit ec754f2

Please sign in to comment.