From 871af3ea7163a436291b465422251906c78b2448 Mon Sep 17 00:00:00 2001 From: Valdas Rakutis <36191877+UndeadRat22@users.noreply.github.com> Date: Mon, 30 Sep 2024 13:18:50 +0300 Subject: [PATCH] feat: add woop downscaling behaviour customization (#387) --- castai/resource_workload_scaling_policy.go | 57 ++++++++++++++++++- .../resource_workload_scaling_policy_test.go | 4 ++ castai/sdk/api.gen.go | 35 +++++++++++- docs/resources/workload_scaling_policy.md | 14 +++++ examples/aks/aks_cluster/providers.tf | 1 + .../resource.tf | 3 + 6 files changed, 111 insertions(+), 3 deletions(-) diff --git a/castai/resource_workload_scaling_policy.go b/castai/resource_workload_scaling_policy.go index 4e1dfc6c..db1046ab 100644 --- a/castai/resource_workload_scaling_policy.go +++ b/castai/resource_workload_scaling_policy.go @@ -90,6 +90,23 @@ func resourceWorkloadScalingPolicy() *schema.Resource { }, }, }, + "downscaling": { + Type: schema.TypeList, + Optional: true, + MaxItems: 1, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "apply_type": { + Type: schema.TypeString, + Optional: true, + Description: `Defines the apply type to be used when downscaling. + - IMMEDIATE - pods are restarted immediately when new recommendation is generated. + - DEFERRED - pods are not restarted and recommendation values are applied during natural restarts only (new deployment, etc.)`, + ValidateDiagFunc: validation.ToDiagFunc(validation.StringInSlice([]string{"IMMEDIATE", "DEFERRED"}, false)), + }, + }, + }, + }, }, Timeouts: &schema.ResourceTimeout{ Create: schema.DefaultTimeout(15 * time.Second), @@ -167,6 +184,8 @@ func resourceWorkloadScalingPolicyCreate(ctx context.Context, d *schema.Resource req.RecommendationPolicies.Startup = toStartup(toSection(d, "startup")) + req.RecommendationPolicies.Downscaling = toDownscaling(toSection(d, "downscaling")) + resp, err := client.WorkloadOptimizationAPICreateWorkloadScalingPolicyWithResponse(ctx, clusterID, req) if checkErr := sdk.CheckOKResponse(resp, err); checkErr != nil { return diag.FromErr(checkErr) @@ -212,10 +231,12 @@ func resourceWorkloadScalingPolicyRead(ctx context.Context, d *schema.ResourceDa if err := d.Set("memory", toWorkloadScalingPoliciesMap(sp.RecommendationPolicies.Memory)); err != nil { return diag.FromErr(fmt.Errorf("setting memory: %w", err)) } - if err := d.Set("startup", toStartupMap(sp.RecommendationPolicies.Startup)); err != nil { return diag.FromErr(fmt.Errorf("setting startup: %w", err)) } + if err := d.Set("downscaling", toDownscalingMap(sp.RecommendationPolicies.Downscaling)); err != nil { + return diag.FromErr(fmt.Errorf("setting downscaling: %w", err)) + } return nil } @@ -228,6 +249,7 @@ func resourceWorkloadScalingPolicyUpdate(ctx context.Context, d *schema.Resource "cpu", "memory", "startup", + "downscaling", ) { tflog.Info(ctx, "scaling policy up to date") return nil @@ -243,6 +265,7 @@ func resourceWorkloadScalingPolicyUpdate(ctx context.Context, d *schema.Resource Cpu: toWorkloadScalingPolicies(d.Get("cpu").([]interface{})[0].(map[string]interface{})), Memory: toWorkloadScalingPolicies(d.Get("memory").([]interface{})[0].(map[string]interface{})), Startup: toStartup(toSection(d, "startup")), + Downscaling: toDownscaling(toSection(d, "downscaling")), }, } @@ -410,3 +433,35 @@ func toStartupMap(s *sdk.WorkloadoptimizationV1StartupSettings) []map[string]int return []map[string]interface{}{m} } + +func toDownscaling(downscaling map[string]any) *sdk.WorkloadoptimizationV1DownscalingSettings { + if len(downscaling) == 0 { + return nil + } + + result := &sdk.WorkloadoptimizationV1DownscalingSettings{} + + if v, ok := downscaling["apply_type"].(string); ok && v != "" { + result.ApplyType = lo.ToPtr(sdk.WorkloadoptimizationV1ApplyType(v)) + } + + return result +} + +func toDownscalingMap(s *sdk.WorkloadoptimizationV1DownscalingSettings) []map[string]any { + if s == nil { + return nil + } + + m := map[string]any{} + + if s.ApplyType != nil { + m["apply_type"] = string(*s.ApplyType) + } + + if len(m) == 0 { + return nil + } + + return []map[string]any{m} +} diff --git a/castai/resource_workload_scaling_policy_test.go b/castai/resource_workload_scaling_policy_test.go index 8b74ae61..a5fdbc33 100644 --- a/castai/resource_workload_scaling_policy_test.go +++ b/castai/resource_workload_scaling_policy_test.go @@ -68,6 +68,7 @@ func TestAccResourceWorkloadScalingPolicy(t *testing.T) { resource.TestCheckResourceAttr(resourceName, "memory.0.apply_threshold", "0.2"), resource.TestCheckResourceAttr(resourceName, "memory.0.args.0", "0.9"), resource.TestCheckResourceAttr(resourceName, "startup.0.period_seconds", "123"), + resource.TestCheckResourceAttr(resourceName, "downscaling.0.apply_type", "DEFERRED"), ), }, }, @@ -132,6 +133,9 @@ func scalingPolicyConfigUpdated(clusterName, projectID, name string) string { startup { period_seconds = 123 } + downscaling { + apply_type = "DEFERRED" + } }`, updatedName) return ConfigCompose(testAccGKEClusterConfig(name, clusterName, projectID), cfg) diff --git a/castai/sdk/api.gen.go b/castai/sdk/api.gen.go index 89d603f1..99f2cf1f 100644 --- a/castai/sdk/api.gen.go +++ b/castai/sdk/api.gen.go @@ -242,6 +242,15 @@ const ( Unknown PoliciesV1EvictorStatus = "Unknown" ) +// Defines values for PoliciesV1PodPinnerStatus. +const ( + PodPinnerStatusCompatible PoliciesV1PodPinnerStatus = "PodPinnerStatus_Compatible" + PodPinnerStatusIncompatible PoliciesV1PodPinnerStatus = "PodPinnerStatus_Incompatible" + PodPinnerStatusIncompatibleVersion PoliciesV1PodPinnerStatus = "PodPinnerStatus_IncompatibleVersion" + PodPinnerStatusMissing PoliciesV1PodPinnerStatus = "PodPinnerStatus_Missing" + PodPinnerStatusUnknown PoliciesV1PodPinnerStatus = "PodPinnerStatus_Unknown" +) + // Defines values for PoliciesV1SpotInterruptionPredictionsType. const ( AWSRebalanceRecommendations PoliciesV1SpotInterruptionPredictionsType = "AWSRebalanceRecommendations" @@ -1668,6 +1677,9 @@ type ExternalclusterV1ClusterUpdate struct { // UpdateEKSClusterParams defines updatable EKS cluster configuration. Eks *ExternalclusterV1UpdateEKSClusterParams `json:"eks,omitempty"` + + // UpdateGKEClusterParams defines updatable GKE cluster configuration. + Gke *ExternalclusterV1UpdateGKEClusterParams `json:"gke,omitempty"` } // ExternalclusterV1CreateAssumeRolePrincipalResponse defines model for externalcluster.v1.CreateAssumeRolePrincipalResponse. @@ -2115,6 +2127,15 @@ type ExternalclusterV1UpdateEKSClusterParams struct { AssumeRoleArn *string `json:"assumeRoleArn,omitempty"` } +// UpdateGKEClusterParams defines updatable GKE cluster configuration. +type ExternalclusterV1UpdateGKEClusterParams struct { + // service account email to impersonate. + GkeSaImpersonate *string `json:"gkeSaImpersonate,omitempty"` + + // GCP target project where cluster runs. + ProjectId *string `json:"projectId,omitempty"` +} + // Cluster zone. type ExternalclusterV1Zone struct { // ID of the zone. @@ -2972,9 +2993,13 @@ type PoliciesV1NodeDownscalerEmptyNodes struct { // Defines the CAST AI Pod Pinner component settings. type PoliciesV1PodPinner struct { // Enable/disable the Pod Pinner policy. This will either enable or disable the Pod Pinner component's automatic management in your cluster. - Enabled *bool `json:"enabled"` + Enabled *bool `json:"enabled"` + Status *PoliciesV1PodPinnerStatus `json:"status,omitempty"` } +// PoliciesV1PodPinnerStatus defines model for policies.v1.PodPinnerStatus. +type PoliciesV1PodPinnerStatus string + // Defines the autoscaling policies details. type PoliciesV1Policies struct { // Defines minimum and maximum amount of CPU the cluster can have. @@ -3298,6 +3323,11 @@ type WorkloadoptimizationV1CpuMetrics struct { // WorkloadoptimizationV1DeleteWorkloadScalingPolicyResponse defines model for workloadoptimization.v1.DeleteWorkloadScalingPolicyResponse. type WorkloadoptimizationV1DeleteWorkloadScalingPolicyResponse = map[string]interface{} +// WorkloadoptimizationV1DownscalingSettings defines model for workloadoptimization.v1.DownscalingSettings. +type WorkloadoptimizationV1DownscalingSettings struct { + ApplyType *WorkloadoptimizationV1ApplyType `json:"applyType,omitempty"` +} + // WorkloadoptimizationV1Event defines model for workloadoptimization.v1.Event. type WorkloadoptimizationV1Event struct { ConfigurationChanged *WorkloadoptimizationV1ConfigurationChangedEvent `json:"configurationChanged,omitempty"` @@ -3507,7 +3537,8 @@ type WorkloadoptimizationV1RecommendationEventType string // WorkloadoptimizationV1RecommendationPolicies defines model for workloadoptimization.v1.RecommendationPolicies. type WorkloadoptimizationV1RecommendationPolicies struct { - Cpu WorkloadoptimizationV1ResourcePolicies `json:"cpu"` + Cpu WorkloadoptimizationV1ResourcePolicies `json:"cpu"` + Downscaling *WorkloadoptimizationV1DownscalingSettings `json:"downscaling,omitempty"` // Defines possible options for workload management. // READ_ONLY - workload watched (metrics collected), but no actions may be performed by CAST AI. diff --git a/docs/resources/workload_scaling_policy.md b/docs/resources/workload_scaling_policy.md index d19a135e..17d7a725 100644 --- a/docs/resources/workload_scaling_policy.md +++ b/docs/resources/workload_scaling_policy.md @@ -35,6 +35,9 @@ resource "castai_workload_scaling_policy" "services" { startup { period_seconds = 240 } + downscaling { + apply_type = "DEFERRED" + } } ``` @@ -56,6 +59,7 @@ resource "castai_workload_scaling_policy" "services" { ### Optional +- `downscaling` (Block List, Max: 1) (see [below for nested schema](#nestedblock--downscaling)) - `startup` (Block List, Max: 1) (see [below for nested schema](#nestedblock--startup)) - `timeouts` (Block, Optional) (see [below for nested schema](#nestedblock--timeouts)) @@ -87,6 +91,16 @@ Optional: - `overhead` (Number) Overhead for the recommendation, e.g. `0.1` will result in 10% higher recommendation + +### Nested Schema for `downscaling` + +Optional: + +- `apply_type` (String) Defines the apply type to be used when downscaling. + - IMMEDIATE - pods are restarted immediately when new recommendation is generated. + - DEFERRED - pods are not restarted and recommendation values are applied during natural restarts only (new deployment, etc.) + + ### Nested Schema for `startup` diff --git a/examples/aks/aks_cluster/providers.tf b/examples/aks/aks_cluster/providers.tf index bdab1922..b1a9e19e 100644 --- a/examples/aks/aks_cluster/providers.tf +++ b/examples/aks/aks_cluster/providers.tf @@ -4,5 +4,6 @@ provider "azurerm" { } provider "azuread" { + version = "2.53.1" tenant_id = data.azurerm_subscription.current.tenant_id } diff --git a/examples/resources/castai_workload_scaling_policy/resource.tf b/examples/resources/castai_workload_scaling_policy/resource.tf index fd6978b2..ebdd3ee0 100644 --- a/examples/resources/castai_workload_scaling_policy/resource.tf +++ b/examples/resources/castai_workload_scaling_policy/resource.tf @@ -18,4 +18,7 @@ resource "castai_workload_scaling_policy" "services" { startup { period_seconds = 240 } + downscaling { + apply_type = "DEFERRED" + } } \ No newline at end of file