From bb39cda0948263b7ce430a24333a6dca3846a24c Mon Sep 17 00:00:00 2001 From: Andrews Arokiam <87992092+andyi2it@users.noreply.github.com> Date: Sat, 7 Oct 2023 20:56:12 +0530 Subject: [PATCH] Allow disabling creation of the HPA in raw deployment mode (#3086) * Refactor autoscaler functionality into an interface. Add no-op autoscaler so users disable creation of autoscaler resources. Add AutoscalerClassNone feature with test cases Signed-off-by: jooho Signed-off-by: Dan Sun Signed-off-by: Andrews Arokiam * Rollback getHPAMetrics to set ScaleTarget,ScaleMetric Signed-off-by: jooho Signed-off-by: Dan Sun Signed-off-by: Andrews Arokiam * Fix merge conflict Signed-off-by: Dan Sun Signed-off-by: Andrews Arokiam * Fix autoscaler api group Signed-off-by: Dan Sun Signed-off-by: Andrews Arokiam * Fix autoscaler merge Signed-off-by: Dan Sun Signed-off-by: Andrews Arokiam * Fix autoscaling tests Signed-off-by: Dan Sun Signed-off-by: Andrews Arokiam * Updated hpa disable test. Signed-off-by: Andrews Arokiam * Replaced "AutoscalerClassNone" with "AutoscalerClassExternal" Signed-off-by: Andrews Arokiam * Fixed autoscalerClass 'external' go test. Signed-off-by: Andrews Arokiam --------- Signed-off-by: jooho Signed-off-by: Dan Sun Signed-off-by: Andrews Arokiam Co-authored-by: Curtis Maddalozzo Co-authored-by: jooho Co-authored-by: Dan Sun --- .../v1beta1/inference_service_validation.go | 2 + pkg/constants/constants.go | 4 +- .../inferenceservice/components/explainer.go | 6 +- .../inferenceservice/components/predictor.go | 6 +- .../components/transformer.go | 6 +- .../rawkube_controller_test.go | 349 +++++++++++++++++- .../autoscaler/autoscaler_reconciler.go | 46 ++- .../autoscaler/autoscaler_reconciler_test.go | 75 ++++ .../reconcilers/hpa/hpa_reconciler.go | 12 +- .../reconcilers/raw/raw_kube_reconciler.go | 2 +- 10 files changed, 468 insertions(+), 40 deletions(-) create mode 100644 pkg/controller/v1beta1/inferenceservice/reconcilers/autoscaler/autoscaler_reconciler_test.go diff --git a/pkg/apis/serving/v1beta1/inference_service_validation.go b/pkg/apis/serving/v1beta1/inference_service_validation.go index f329741a20..89f30897b8 100644 --- a/pkg/apis/serving/v1beta1/inference_service_validation.go +++ b/pkg/apis/serving/v1beta1/inference_service_validation.go @@ -139,6 +139,8 @@ func validateInferenceServiceAutoscaler(isvc *InferenceService) error { } else { return nil } + case constants.AutoscalerClassExternal: + return nil default: return fmt.Errorf("unknown autoscaler class [%s]", class) } diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go index baf55f157b..499eee8ea9 100644 --- a/pkg/constants/constants.go +++ b/pkg/constants/constants.go @@ -154,7 +154,8 @@ var ( // Autoscaler Class var ( - AutoscalerClassHPA AutoscalerClassType = "hpa" + AutoscalerClassHPA AutoscalerClassType = "hpa" + AutoscalerClassExternal AutoscalerClassType = "external" ) // Autoscaler Metrics @@ -170,6 +171,7 @@ var ( // Autoscaler Class Allowed List var AutoscalerAllowedClassList = []AutoscalerClassType{ AutoscalerClassHPA, + AutoscalerClassExternal, } // Autoscaler Metrics Allowed List diff --git a/pkg/controller/v1beta1/inferenceservice/components/explainer.go b/pkg/controller/v1beta1/inferenceservice/components/explainer.go index f8eaaf4d66..8ea0a80f94 100644 --- a/pkg/controller/v1beta1/inferenceservice/components/explainer.go +++ b/pkg/controller/v1beta1/inferenceservice/components/explainer.go @@ -151,10 +151,8 @@ func (e *Explainer) Reconcile(isvc *v1beta1.InferenceService) (ctrl.Result, erro return ctrl.Result{}, errors.Wrapf(err, "fails to set service owner reference for explainer") } //set autoscaler Controller - if r.Scaler.Autoscaler.AutoscalerClass == constants.AutoscalerClassHPA { - if err := controllerutil.SetControllerReference(isvc, r.Scaler.Autoscaler.HPA.HPA, e.scheme); err != nil { - return ctrl.Result{}, errors.Wrapf(err, "fails to set HPA owner reference for explainer") - } + if err := r.Scaler.Autoscaler.SetControllerReferences(isvc, e.scheme); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "fails to set autoscaler owner references for explainer") } deployment, err := r.Reconcile() diff --git a/pkg/controller/v1beta1/inferenceservice/components/predictor.go b/pkg/controller/v1beta1/inferenceservice/components/predictor.go index dd2b358a19..5700e85658 100644 --- a/pkg/controller/v1beta1/inferenceservice/components/predictor.go +++ b/pkg/controller/v1beta1/inferenceservice/components/predictor.go @@ -319,10 +319,8 @@ func (p *Predictor) Reconcile(isvc *v1beta1.InferenceService) (ctrl.Result, erro return ctrl.Result{}, errors.Wrapf(err, "fails to set service owner reference for predictor") } //set autoscaler Controller - if r.Scaler.Autoscaler.AutoscalerClass == constants.AutoscalerClassHPA { - if err := controllerutil.SetControllerReference(isvc, r.Scaler.Autoscaler.HPA.HPA, p.scheme); err != nil { - return ctrl.Result{}, errors.Wrapf(err, "fails to set HPA owner reference for predictor") - } + if err := r.Scaler.Autoscaler.SetControllerReferences(isvc, p.scheme); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "fails to set autoscaler owner references for predictor") } deployment, err := r.Reconcile() diff --git a/pkg/controller/v1beta1/inferenceservice/components/transformer.go b/pkg/controller/v1beta1/inferenceservice/components/transformer.go index c1ec83dcf5..df2557d662 100644 --- a/pkg/controller/v1beta1/inferenceservice/components/transformer.go +++ b/pkg/controller/v1beta1/inferenceservice/components/transformer.go @@ -181,10 +181,8 @@ func (p *Transformer) Reconcile(isvc *v1beta1.InferenceService) (ctrl.Result, er return ctrl.Result{}, errors.Wrapf(err, "fails to set service owner reference for transformer") } //set autoscaler Controller - if r.Scaler.Autoscaler.AutoscalerClass == constants.AutoscalerClassHPA { - if err := controllerutil.SetControllerReference(isvc, r.Scaler.Autoscaler.HPA.HPA, p.scheme); err != nil { - return ctrl.Result{}, errors.Wrapf(err, "fails to set HPA owner reference for transformer") - } + if err := r.Scaler.Autoscaler.SetControllerReferences(isvc, p.scheme); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "fails to set autoscaler owner references for transformer") } deployment, err := r.Reconcile() diff --git a/pkg/controller/v1beta1/inferenceservice/rawkube_controller_test.go b/pkg/controller/v1beta1/inferenceservice/rawkube_controller_test.go index e97e1ea74f..e1f1d46c7e 100644 --- a/pkg/controller/v1beta1/inferenceservice/rawkube_controller_test.go +++ b/pkg/controller/v1beta1/inferenceservice/rawkube_controller_test.go @@ -164,7 +164,7 @@ var _ = Describe("v1beta1 inference service controller", func() { StorageURI: &storageUri, RuntimeVersion: proto.String("1.14.0"), Container: v1.Container{ - Name: "kserve-container", + Name: constants.InferenceServiceContainerName, Resources: defaultResource, }, }, @@ -503,6 +503,353 @@ var _ = Describe("v1beta1 inference service controller", func() { } Expect(actualHPA.Spec).To(gomega.Equal(expectedHPA.Spec)) }) + It("Should have ingress/service/deployment created", func() { + By("By creating a new InferenceService with AutoscalerClassExternal") + // Create configmap + var configMap = &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: constants.InferenceServiceConfigMapName, + Namespace: constants.KServeNamespace, + }, + Data: configs, + } + Expect(k8sClient.Create(context.TODO(), configMap)).NotTo(HaveOccurred()) + defer k8sClient.Delete(context.TODO(), configMap) + // Create ServingRuntime + servingRuntime := &v1alpha1.ServingRuntime{ + ObjectMeta: metav1.ObjectMeta{ + Name: "tf-serving-raw", + Namespace: "default", + }, + Spec: v1alpha1.ServingRuntimeSpec{ + SupportedModelFormats: []v1alpha1.SupportedModelFormat{ + { + Name: "tensorflow", + Version: proto.String("1"), + AutoSelect: proto.Bool(true), + }, + }, + ServingRuntimePodSpec: v1alpha1.ServingRuntimePodSpec{ + Containers: []v1.Container{ + { + Name: "kserve-container", + Image: "tensorflow/serving:1.14.0", + Command: []string{"/usr/bin/tensorflow_model_server"}, + Args: []string{ + "--port=9000", + "--rest_api_port=8080", + "--model_base_path=/mnt/models", + "--rest_api_timeout_in_ms=60000", + }, + Resources: defaultResource, + }, + }, + }, + Disabled: proto.Bool(false), + }, + } + k8sClient.Create(context.TODO(), servingRuntime) + defer k8sClient.Delete(context.TODO(), servingRuntime) + serviceName := "raw-foo-2" + var expectedRequest = reconcile.Request{NamespacedName: types.NamespacedName{Name: serviceName, Namespace: "default"}} + var serviceKey = expectedRequest.NamespacedName + var storageUri = "s3://test/mnist/export" + ctx := context.Background() + isvc := &v1beta1.InferenceService{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceKey.Name, + Namespace: serviceKey.Namespace, + Annotations: map[string]string{ + "serving.kserve.io/deploymentMode": "RawDeployment", + "serving.kserve.io/autoscalerClass": "external", + }, + }, + Spec: v1beta1.InferenceServiceSpec{ + Predictor: v1beta1.PredictorSpec{ + Tensorflow: &v1beta1.TFServingSpec{ + PredictorExtensionSpec: v1beta1.PredictorExtensionSpec{ + StorageURI: &storageUri, + RuntimeVersion: proto.String("1.14.0"), + Container: v1.Container{ + Name: constants.InferenceServiceContainerName, + Resources: defaultResource, + }, + }, + }, + }, + }, + } + isvc.DefaultInferenceService(nil, nil) + Expect(k8sClient.Create(ctx, isvc)).Should(Succeed()) + + inferenceService := &v1beta1.InferenceService{} + + Eventually(func() bool { + err := k8sClient.Get(ctx, serviceKey, inferenceService) + if err != nil { + return false + } + return true + }, timeout, interval).Should(BeTrue()) + + actualDeployment := &appsv1.Deployment{} + predictorDeploymentKey := types.NamespacedName{Name: constants.PredictorServiceName(serviceKey.Name), + Namespace: serviceKey.Namespace} + Eventually(func() error { return k8sClient.Get(context.TODO(), predictorDeploymentKey, actualDeployment) }, timeout). + Should(Succeed()) + var replicas int32 = 1 + var revisionHistory int32 = 10 + var progressDeadlineSeconds int32 = 600 + var gracePeriod int64 = 30 + expectedDeployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: predictorDeploymentKey.Name, + Namespace: predictorDeploymentKey.Namespace, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicas, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "isvc." + predictorDeploymentKey.Name, + }, + }, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Name: predictorDeploymentKey.Name, + Namespace: "default", + Labels: map[string]string{ + "app": "isvc." + predictorDeploymentKey.Name, + constants.KServiceComponentLabel: constants.Predictor.String(), + constants.InferenceServicePodLabelKey: serviceName, + }, + Annotations: map[string]string{ + constants.StorageInitializerSourceUriInternalAnnotationKey: *isvc.Spec.Predictor.Model.StorageURI, + "serving.kserve.io/deploymentMode": "RawDeployment", + "serving.kserve.io/autoscalerClass": "external", + }, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Image: "tensorflow/serving:" + + *isvc.Spec.Predictor.Model.RuntimeVersion, + Name: constants.InferenceServiceContainerName, + Command: []string{v1beta1.TensorflowEntrypointCommand}, + Args: []string{ + "--port=" + v1beta1.TensorflowServingGRPCPort, + "--rest_api_port=" + v1beta1.TensorflowServingRestPort, + "--model_base_path=" + constants.DefaultModelLocalMountPath, + "--rest_api_timeout_in_ms=60000", + }, + Resources: defaultResource, + ReadinessProbe: &v1.Probe{ + ProbeHandler: v1.ProbeHandler{ + TCPSocket: &v1.TCPSocketAction{ + Port: intstr.IntOrString{ + IntVal: 8080, + }, + }, + }, + InitialDelaySeconds: 0, + TimeoutSeconds: 1, + PeriodSeconds: 10, + SuccessThreshold: 1, + FailureThreshold: 3, + }, + TerminationMessagePath: "/dev/termination-log", + TerminationMessagePolicy: "File", + ImagePullPolicy: "IfNotPresent", + }, + }, + SchedulerName: "default-scheduler", + RestartPolicy: "Always", + TerminationGracePeriodSeconds: &gracePeriod, + DNSPolicy: "ClusterFirst", + SecurityContext: &v1.PodSecurityContext{ + SELinuxOptions: nil, + WindowsOptions: nil, + RunAsUser: nil, + RunAsGroup: nil, + RunAsNonRoot: nil, + SupplementalGroups: nil, + FSGroup: nil, + Sysctls: nil, + FSGroupChangePolicy: nil, + SeccompProfile: nil, + }, + }, + }, + Strategy: appsv1.DeploymentStrategy{ + Type: "RollingUpdate", + RollingUpdate: &appsv1.RollingUpdateDeployment{ + MaxUnavailable: &intstr.IntOrString{Type: 1, IntVal: 0, StrVal: "25%"}, + MaxSurge: &intstr.IntOrString{Type: 1, IntVal: 0, StrVal: "25%"}, + }, + }, + RevisionHistoryLimit: &revisionHistory, + ProgressDeadlineSeconds: &progressDeadlineSeconds, + }, + } + Expect(actualDeployment.Spec).To(gomega.Equal(expectedDeployment.Spec)) + + //check service + actualService := &v1.Service{} + predictorServiceKey := types.NamespacedName{Name: constants.PredictorServiceName(serviceKey.Name), + Namespace: serviceKey.Namespace} + Eventually(func() error { return k8sClient.Get(context.TODO(), predictorServiceKey, actualService) }, timeout). + Should(Succeed()) + + expectedService := &v1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: predictorServiceKey.Name, + Namespace: predictorServiceKey.Namespace, + }, + Spec: v1.ServiceSpec{ + Ports: []v1.ServicePort{ + { + Name: "raw-foo-2-predictor", + Protocol: "TCP", + Port: 80, + TargetPort: intstr.IntOrString{Type: 0, IntVal: 8080, StrVal: ""}, + }, + }, + Type: "ClusterIP", + SessionAffinity: "None", + Selector: map[string]string{ + "app": "isvc.raw-foo-2-predictor", + }, + }, + } + actualService.Spec.ClusterIP = "" + actualService.Spec.ClusterIPs = nil + actualService.Spec.IPFamilies = nil + actualService.Spec.IPFamilyPolicy = nil + actualService.Spec.InternalTrafficPolicy = nil + Expect(actualService.Spec).To(gomega.Equal(expectedService.Spec)) + + //check isvc status + updatedDeployment := actualDeployment.DeepCopy() + updatedDeployment.Status.Conditions = []appsv1.DeploymentCondition{ + { + Type: appsv1.DeploymentAvailable, + Status: v1.ConditionTrue, + }, + } + Expect(k8sClient.Status().Update(context.TODO(), updatedDeployment)).NotTo(gomega.HaveOccurred()) + + //check ingress + pathType := netv1.PathTypePrefix + actualIngress := &netv1.Ingress{} + predictorIngressKey := types.NamespacedName{Name: serviceKey.Name, + Namespace: serviceKey.Namespace} + Eventually(func() error { return k8sClient.Get(context.TODO(), predictorIngressKey, actualIngress) }, timeout). + Should(Succeed()) + expectedIngress := netv1.Ingress{ + Spec: netv1.IngressSpec{ + Rules: []netv1.IngressRule{ + { + Host: "raw-foo-2-default.example.com", + IngressRuleValue: netv1.IngressRuleValue{ + HTTP: &netv1.HTTPIngressRuleValue{ + Paths: []netv1.HTTPIngressPath{ + { + Path: "/", + PathType: &pathType, + Backend: netv1.IngressBackend{ + Service: &netv1.IngressServiceBackend{ + Name: "raw-foo-2-predictor", + Port: netv1.ServiceBackendPort{ + Number: 80, + }, + }, + }, + }, + }, + }, + }, + }, + { + Host: "raw-foo-2-predictor-default.example.com", + IngressRuleValue: netv1.IngressRuleValue{ + HTTP: &netv1.HTTPIngressRuleValue{ + Paths: []netv1.HTTPIngressPath{ + { + Path: "/", + PathType: &pathType, + Backend: netv1.IngressBackend{ + Service: &netv1.IngressServiceBackend{ + Name: "raw-foo-2-predictor", + Port: netv1.ServiceBackendPort{ + Number: 80, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + } + Expect(actualIngress.Spec).To(gomega.Equal(expectedIngress.Spec)) + // verify if InferenceService status is updated + expectedIsvcStatus := v1beta1.InferenceServiceStatus{ + Status: duckv1.Status{ + Conditions: duckv1.Conditions{ + { + Type: v1beta1.IngressReady, + Status: "True", + }, + { + Type: v1beta1.PredictorReady, + Status: "True", + }, + { + Type: apis.ConditionReady, + Status: "True", + }, + }, + }, + URL: &apis.URL{ + Scheme: "http", + Host: "raw-foo-2-default.example.com", + }, + Address: &duckv1.Addressable{ + URL: &apis.URL{ + Scheme: "http", + Host: fmt.Sprintf("%s-predictor.%s.svc.cluster.local", serviceKey.Name, serviceKey.Namespace), + }, + }, + Components: map[v1beta1.ComponentType]v1beta1.ComponentStatusSpec{ + v1beta1.PredictorComponent: { + LatestCreatedRevision: "", + URL: &apis.URL{ + Scheme: "http", + Host: "raw-foo-2-predictor-default.example.com", + }, + }, + }, + ModelStatus: v1beta1.ModelStatus{ + TransitionStatus: "InProgress", + ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"}, + }, + } + Eventually(func() string { + isvc := &v1beta1.InferenceService{} + if err := k8sClient.Get(context.TODO(), serviceKey, isvc); err != nil { + return err.Error() + } + return cmp.Diff(&expectedIsvcStatus, &isvc.Status, cmpopts.IgnoreTypes(apis.VolatileTime{})) + }, timeout).Should(gomega.BeEmpty()) + + //check HPA is not created + actualHPA := &autoscalingv2.HorizontalPodAutoscaler{} + predictorHPAKey := types.NamespacedName{Name: constants.DefaultPredictorServiceName(serviceKey.Name), + Namespace: serviceKey.Namespace} + Eventually(func() error { return k8sClient.Get(context.TODO(), predictorHPAKey, actualHPA) }, timeout). + Should(HaveOccurred()) + }) }) Context("When creating inference service with raw kube predictor and empty ingressClassName", func() { configs := map[string]string{ diff --git a/pkg/controller/v1beta1/inferenceservice/reconcilers/autoscaler/autoscaler_reconciler.go b/pkg/controller/v1beta1/inferenceservice/reconcilers/autoscaler/autoscaler_reconciler.go index 0966b66f8f..365f69e9ec 100644 --- a/pkg/controller/v1beta1/inferenceservice/reconcilers/autoscaler/autoscaler_reconciler.go +++ b/pkg/controller/v1beta1/inferenceservice/reconcilers/autoscaler/autoscaler_reconciler.go @@ -17,11 +17,11 @@ limitations under the License. package autoscaler import ( - "github.com/pkg/errors" - + "fmt" "github.com/kserve/kserve/pkg/apis/serving/v1beta1" "github.com/kserve/kserve/pkg/constants" hpa "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/hpa" + autoscalingv2 "k8s.io/api/autoscaling/v2" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -30,16 +30,28 @@ import ( var log = logf.Log.WithName("AutoscalerReconciler") -type Autoscaler struct { - AutoscalerClass constants.AutoscalerClassType - HPA *hpa.HPAReconciler +// Autoscaler Interface implemented by all autoscalers +type Autoscaler interface { + Reconcile() (*autoscalingv2.HorizontalPodAutoscaler, error) + SetControllerReferences(owner metav1.Object, scheme *runtime.Scheme) error +} + +// NoOpAutoscaler Autoscaler that does nothing. Can be used to disable creation of autoscaler resources. +type NoOpAutoscaler struct{} + +func (*NoOpAutoscaler) Reconcile() (*autoscalingv2.HorizontalPodAutoscaler, error) { + return nil, nil +} + +func (a *NoOpAutoscaler) SetControllerReferences(owner metav1.Object, scheme *runtime.Scheme) error { + return nil } // AutoscalerReconciler is the struct of Raw K8S Object type AutoscalerReconciler struct { client client.Client scheme *runtime.Scheme - Autoscaler *Autoscaler + Autoscaler Autoscaler componentExt *v1beta1.ComponentExtensionSpec } @@ -70,27 +82,21 @@ func getAutoscalerClass(metadata metav1.ObjectMeta) constants.AutoscalerClassTyp func createAutoscaler(client client.Client, scheme *runtime.Scheme, componentMeta metav1.ObjectMeta, - componentExt *v1beta1.ComponentExtensionSpec) (*Autoscaler, error) { - as := &Autoscaler{} + componentExt *v1beta1.ComponentExtensionSpec) (Autoscaler, error) { ac := getAutoscalerClass(componentMeta) - as.AutoscalerClass = ac switch ac { case constants.AutoscalerClassHPA: - as.HPA = hpa.NewHPAReconciler(client, scheme, componentMeta, componentExt) + return hpa.NewHPAReconciler(client, scheme, componentMeta, componentExt), nil + case constants.AutoscalerClassExternal: + return &NoOpAutoscaler{}, nil default: - return nil, errors.New("unknown autoscaler class type.") + return nil, fmt.Errorf("Unknown autoscaler class type: %v", ac) } - return as, nil } // Reconcile ... -func (r *AutoscalerReconciler) Reconcile() (*Autoscaler, error) { +func (r *AutoscalerReconciler) Reconcile() error { //reconcile Autoscaler - if r.Autoscaler.AutoscalerClass == constants.AutoscalerClassHPA { - _, err := r.Autoscaler.HPA.Reconcile() - if err != nil { - return nil, err - } - } - return r.Autoscaler, nil + r.Autoscaler.Reconcile() + return nil } diff --git a/pkg/controller/v1beta1/inferenceservice/reconcilers/autoscaler/autoscaler_reconciler_test.go b/pkg/controller/v1beta1/inferenceservice/reconcilers/autoscaler/autoscaler_reconciler_test.go new file mode 100644 index 0000000000..6ea35f2814 --- /dev/null +++ b/pkg/controller/v1beta1/inferenceservice/reconcilers/autoscaler/autoscaler_reconciler_test.go @@ -0,0 +1,75 @@ +/* +Copyright 2021 The KServe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package autoscaler + +import ( + "github.com/google/go-cmp/cmp" + "github.com/kserve/kserve/pkg/constants" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "testing" +) + +func TestGetAutoscalerClass(t *testing.T) { + serviceName := "my-model" + namespace := "test" + + testCases := []struct { + name string + isvcMetaData *metav1.ObjectMeta + expectedAutoScalerType constants.AutoscalerClassType + }{ + { + name: "Return default AutoScaler,if the autoscalerClass annotation is not set", + isvcMetaData: &metav1.ObjectMeta{ + Name: serviceName, + Namespace: namespace, + Annotations: map[string]string{}, + }, + + expectedAutoScalerType: constants.AutoscalerClassHPA, + }, + { + name: "Return default AutoScaler,if the autoscalerClass annotation set hpa", + isvcMetaData: &metav1.ObjectMeta{ + Name: serviceName, + Namespace: namespace, + Annotations: map[string]string{"serving.kserve.io/autoscalerClass": "hpa"}, + }, + + expectedAutoScalerType: constants.AutoscalerClassHPA, + }, + { + name: "Return external AutoScaler,if the autoscalerClass annotation set external", + isvcMetaData: &metav1.ObjectMeta{ + Name: serviceName, + Namespace: namespace, + Annotations: map[string]string{"serving.kserve.io/autoscalerClass": "external"}, + }, + expectedAutoScalerType: constants.AutoscalerClassExternal, + }, + } + + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + result := getAutoscalerClass(*tt.isvcMetaData) + if diff := cmp.Diff(tt.expectedAutoScalerType, result); diff != "" { + t.Errorf("Test %q unexpected result (-want +got): %v", t.Name(), diff) + } + }) + } +} diff --git a/pkg/controller/v1beta1/inferenceservice/reconcilers/hpa/hpa_reconciler.go b/pkg/controller/v1beta1/inferenceservice/reconcilers/hpa/hpa_reconciler.go index 823eb5317d..97eca43d50 100644 --- a/pkg/controller/v1beta1/inferenceservice/reconcilers/hpa/hpa_reconciler.go +++ b/pkg/controller/v1beta1/inferenceservice/reconcilers/hpa/hpa_reconciler.go @@ -27,6 +27,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" logf "sigs.k8s.io/controller-runtime/pkg/log" ) @@ -56,7 +57,6 @@ func getHPAMetrics(metadata metav1.ObjectMeta, componentExt *v1beta1.ComponentEx var metrics []autoscalingv2.MetricSpec var utilization int32 annotations := metadata.Annotations - resourceName := corev1.ResourceCPU if value, ok := annotations[constants.TargetUtilizationPercentage]; ok { @@ -86,7 +86,6 @@ func getHPAMetrics(metadata metav1.ObjectMeta, componentExt *v1beta1.ComponentEx Target: metricTarget, }, } - metrics = append(metrics, ms) return metrics } @@ -115,9 +114,8 @@ func createHPA(componentMeta metav1.ObjectMeta, }, MinReplicas: &minReplicas, MaxReplicas: maxReplicas, - - Metrics: metrics, - Behavior: &autoscalingv2.HorizontalPodAutoscalerBehavior{}, + Metrics: metrics, + Behavior: &autoscalingv2.HorizontalPodAutoscalerBehavior{}, }, } return hpa @@ -178,3 +176,7 @@ func (r *HPAReconciler) Reconcile() (*autoscalingv2.HorizontalPodAutoscaler, err return existingHPA, nil } } + +func (r *HPAReconciler) SetControllerReferences(owner metav1.Object, scheme *runtime.Scheme) error { + return controllerutil.SetControllerReference(owner, r.HPA, scheme) +} diff --git a/pkg/controller/v1beta1/inferenceservice/reconcilers/raw/raw_kube_reconciler.go b/pkg/controller/v1beta1/inferenceservice/reconcilers/raw/raw_kube_reconciler.go index 3bf56a4050..8ef219e4e2 100644 --- a/pkg/controller/v1beta1/inferenceservice/reconcilers/raw/raw_kube_reconciler.go +++ b/pkg/controller/v1beta1/inferenceservice/reconcilers/raw/raw_kube_reconciler.go @@ -97,7 +97,7 @@ func (r *RawKubeReconciler) Reconcile() (*appsv1.Deployment, error) { return nil, err } //reconcile HPA - _, err = r.Scaler.Reconcile() + err = r.Scaler.Reconcile() if err != nil { return nil, err }