From 871af3ea7163a436291b465422251906c78b2448 Mon Sep 17 00:00:00 2001
From: Valdas Rakutis <36191877+UndeadRat22@users.noreply.github.com>
Date: Mon, 30 Sep 2024 13:18:50 +0300
Subject: [PATCH] feat: add woop downscaling behaviour customization (#387)
---
castai/resource_workload_scaling_policy.go | 57 ++++++++++++++++++-
.../resource_workload_scaling_policy_test.go | 4 ++
castai/sdk/api.gen.go | 35 +++++++++++-
docs/resources/workload_scaling_policy.md | 14 +++++
examples/aks/aks_cluster/providers.tf | 1 +
.../resource.tf | 3 +
6 files changed, 111 insertions(+), 3 deletions(-)
diff --git a/castai/resource_workload_scaling_policy.go b/castai/resource_workload_scaling_policy.go
index 4e1dfc6c..db1046ab 100644
--- a/castai/resource_workload_scaling_policy.go
+++ b/castai/resource_workload_scaling_policy.go
@@ -90,6 +90,23 @@ func resourceWorkloadScalingPolicy() *schema.Resource {
},
},
},
+ "downscaling": {
+ Type: schema.TypeList,
+ Optional: true,
+ MaxItems: 1,
+ Elem: &schema.Resource{
+ Schema: map[string]*schema.Schema{
+ "apply_type": {
+ Type: schema.TypeString,
+ Optional: true,
+ Description: `Defines the apply type to be used when downscaling.
+ - IMMEDIATE - pods are restarted immediately when new recommendation is generated.
+ - DEFERRED - pods are not restarted and recommendation values are applied during natural restarts only (new deployment, etc.)`,
+ ValidateDiagFunc: validation.ToDiagFunc(validation.StringInSlice([]string{"IMMEDIATE", "DEFERRED"}, false)),
+ },
+ },
+ },
+ },
},
Timeouts: &schema.ResourceTimeout{
Create: schema.DefaultTimeout(15 * time.Second),
@@ -167,6 +184,8 @@ func resourceWorkloadScalingPolicyCreate(ctx context.Context, d *schema.Resource
req.RecommendationPolicies.Startup = toStartup(toSection(d, "startup"))
+ req.RecommendationPolicies.Downscaling = toDownscaling(toSection(d, "downscaling"))
+
resp, err := client.WorkloadOptimizationAPICreateWorkloadScalingPolicyWithResponse(ctx, clusterID, req)
if checkErr := sdk.CheckOKResponse(resp, err); checkErr != nil {
return diag.FromErr(checkErr)
@@ -212,10 +231,12 @@ func resourceWorkloadScalingPolicyRead(ctx context.Context, d *schema.ResourceDa
if err := d.Set("memory", toWorkloadScalingPoliciesMap(sp.RecommendationPolicies.Memory)); err != nil {
return diag.FromErr(fmt.Errorf("setting memory: %w", err))
}
-
if err := d.Set("startup", toStartupMap(sp.RecommendationPolicies.Startup)); err != nil {
return diag.FromErr(fmt.Errorf("setting startup: %w", err))
}
+ if err := d.Set("downscaling", toDownscalingMap(sp.RecommendationPolicies.Downscaling)); err != nil {
+ return diag.FromErr(fmt.Errorf("setting downscaling: %w", err))
+ }
return nil
}
@@ -228,6 +249,7 @@ func resourceWorkloadScalingPolicyUpdate(ctx context.Context, d *schema.Resource
"cpu",
"memory",
"startup",
+ "downscaling",
) {
tflog.Info(ctx, "scaling policy up to date")
return nil
@@ -243,6 +265,7 @@ func resourceWorkloadScalingPolicyUpdate(ctx context.Context, d *schema.Resource
Cpu: toWorkloadScalingPolicies(d.Get("cpu").([]interface{})[0].(map[string]interface{})),
Memory: toWorkloadScalingPolicies(d.Get("memory").([]interface{})[0].(map[string]interface{})),
Startup: toStartup(toSection(d, "startup")),
+ Downscaling: toDownscaling(toSection(d, "downscaling")),
},
}
@@ -410,3 +433,35 @@ func toStartupMap(s *sdk.WorkloadoptimizationV1StartupSettings) []map[string]int
return []map[string]interface{}{m}
}
+
+func toDownscaling(downscaling map[string]any) *sdk.WorkloadoptimizationV1DownscalingSettings {
+ if len(downscaling) == 0 {
+ return nil
+ }
+
+ result := &sdk.WorkloadoptimizationV1DownscalingSettings{}
+
+ if v, ok := downscaling["apply_type"].(string); ok && v != "" {
+ result.ApplyType = lo.ToPtr(sdk.WorkloadoptimizationV1ApplyType(v))
+ }
+
+ return result
+}
+
+func toDownscalingMap(s *sdk.WorkloadoptimizationV1DownscalingSettings) []map[string]any {
+ if s == nil {
+ return nil
+ }
+
+ m := map[string]any{}
+
+ if s.ApplyType != nil {
+ m["apply_type"] = string(*s.ApplyType)
+ }
+
+ if len(m) == 0 {
+ return nil
+ }
+
+ return []map[string]any{m}
+}
diff --git a/castai/resource_workload_scaling_policy_test.go b/castai/resource_workload_scaling_policy_test.go
index 8b74ae61..a5fdbc33 100644
--- a/castai/resource_workload_scaling_policy_test.go
+++ b/castai/resource_workload_scaling_policy_test.go
@@ -68,6 +68,7 @@ func TestAccResourceWorkloadScalingPolicy(t *testing.T) {
resource.TestCheckResourceAttr(resourceName, "memory.0.apply_threshold", "0.2"),
resource.TestCheckResourceAttr(resourceName, "memory.0.args.0", "0.9"),
resource.TestCheckResourceAttr(resourceName, "startup.0.period_seconds", "123"),
+ resource.TestCheckResourceAttr(resourceName, "downscaling.0.apply_type", "DEFERRED"),
),
},
},
@@ -132,6 +133,9 @@ func scalingPolicyConfigUpdated(clusterName, projectID, name string) string {
startup {
period_seconds = 123
}
+ downscaling {
+ apply_type = "DEFERRED"
+ }
}`, updatedName)
return ConfigCompose(testAccGKEClusterConfig(name, clusterName, projectID), cfg)
diff --git a/castai/sdk/api.gen.go b/castai/sdk/api.gen.go
index 89d603f1..99f2cf1f 100644
--- a/castai/sdk/api.gen.go
+++ b/castai/sdk/api.gen.go
@@ -242,6 +242,15 @@ const (
Unknown PoliciesV1EvictorStatus = "Unknown"
)
+// Defines values for PoliciesV1PodPinnerStatus.
+const (
+ PodPinnerStatusCompatible PoliciesV1PodPinnerStatus = "PodPinnerStatus_Compatible"
+ PodPinnerStatusIncompatible PoliciesV1PodPinnerStatus = "PodPinnerStatus_Incompatible"
+ PodPinnerStatusIncompatibleVersion PoliciesV1PodPinnerStatus = "PodPinnerStatus_IncompatibleVersion"
+ PodPinnerStatusMissing PoliciesV1PodPinnerStatus = "PodPinnerStatus_Missing"
+ PodPinnerStatusUnknown PoliciesV1PodPinnerStatus = "PodPinnerStatus_Unknown"
+)
+
// Defines values for PoliciesV1SpotInterruptionPredictionsType.
const (
AWSRebalanceRecommendations PoliciesV1SpotInterruptionPredictionsType = "AWSRebalanceRecommendations"
@@ -1668,6 +1677,9 @@ type ExternalclusterV1ClusterUpdate struct {
// UpdateEKSClusterParams defines updatable EKS cluster configuration.
Eks *ExternalclusterV1UpdateEKSClusterParams `json:"eks,omitempty"`
+
+ // UpdateGKEClusterParams defines updatable GKE cluster configuration.
+ Gke *ExternalclusterV1UpdateGKEClusterParams `json:"gke,omitempty"`
}
// ExternalclusterV1CreateAssumeRolePrincipalResponse defines model for externalcluster.v1.CreateAssumeRolePrincipalResponse.
@@ -2115,6 +2127,15 @@ type ExternalclusterV1UpdateEKSClusterParams struct {
AssumeRoleArn *string `json:"assumeRoleArn,omitempty"`
}
+// UpdateGKEClusterParams defines updatable GKE cluster configuration.
+type ExternalclusterV1UpdateGKEClusterParams struct {
+ // service account email to impersonate.
+ GkeSaImpersonate *string `json:"gkeSaImpersonate,omitempty"`
+
+ // GCP target project where cluster runs.
+ ProjectId *string `json:"projectId,omitempty"`
+}
+
// Cluster zone.
type ExternalclusterV1Zone struct {
// ID of the zone.
@@ -2972,9 +2993,13 @@ type PoliciesV1NodeDownscalerEmptyNodes struct {
// Defines the CAST AI Pod Pinner component settings.
type PoliciesV1PodPinner struct {
// Enable/disable the Pod Pinner policy. This will either enable or disable the Pod Pinner component's automatic management in your cluster.
- Enabled *bool `json:"enabled"`
+ Enabled *bool `json:"enabled"`
+ Status *PoliciesV1PodPinnerStatus `json:"status,omitempty"`
}
+// PoliciesV1PodPinnerStatus defines model for policies.v1.PodPinnerStatus.
+type PoliciesV1PodPinnerStatus string
+
// Defines the autoscaling policies details.
type PoliciesV1Policies struct {
// Defines minimum and maximum amount of CPU the cluster can have.
@@ -3298,6 +3323,11 @@ type WorkloadoptimizationV1CpuMetrics struct {
// WorkloadoptimizationV1DeleteWorkloadScalingPolicyResponse defines model for workloadoptimization.v1.DeleteWorkloadScalingPolicyResponse.
type WorkloadoptimizationV1DeleteWorkloadScalingPolicyResponse = map[string]interface{}
+// WorkloadoptimizationV1DownscalingSettings defines model for workloadoptimization.v1.DownscalingSettings.
+type WorkloadoptimizationV1DownscalingSettings struct {
+ ApplyType *WorkloadoptimizationV1ApplyType `json:"applyType,omitempty"`
+}
+
// WorkloadoptimizationV1Event defines model for workloadoptimization.v1.Event.
type WorkloadoptimizationV1Event struct {
ConfigurationChanged *WorkloadoptimizationV1ConfigurationChangedEvent `json:"configurationChanged,omitempty"`
@@ -3507,7 +3537,8 @@ type WorkloadoptimizationV1RecommendationEventType string
// WorkloadoptimizationV1RecommendationPolicies defines model for workloadoptimization.v1.RecommendationPolicies.
type WorkloadoptimizationV1RecommendationPolicies struct {
- Cpu WorkloadoptimizationV1ResourcePolicies `json:"cpu"`
+ Cpu WorkloadoptimizationV1ResourcePolicies `json:"cpu"`
+ Downscaling *WorkloadoptimizationV1DownscalingSettings `json:"downscaling,omitempty"`
// Defines possible options for workload management.
// READ_ONLY - workload watched (metrics collected), but no actions may be performed by CAST AI.
diff --git a/docs/resources/workload_scaling_policy.md b/docs/resources/workload_scaling_policy.md
index d19a135e..17d7a725 100644
--- a/docs/resources/workload_scaling_policy.md
+++ b/docs/resources/workload_scaling_policy.md
@@ -35,6 +35,9 @@ resource "castai_workload_scaling_policy" "services" {
startup {
period_seconds = 240
}
+ downscaling {
+ apply_type = "DEFERRED"
+ }
}
```
@@ -56,6 +59,7 @@ resource "castai_workload_scaling_policy" "services" {
### Optional
+- `downscaling` (Block List, Max: 1) (see [below for nested schema](#nestedblock--downscaling))
- `startup` (Block List, Max: 1) (see [below for nested schema](#nestedblock--startup))
- `timeouts` (Block, Optional) (see [below for nested schema](#nestedblock--timeouts))
@@ -87,6 +91,16 @@ Optional:
- `overhead` (Number) Overhead for the recommendation, e.g. `0.1` will result in 10% higher recommendation
+
+### Nested Schema for `downscaling`
+
+Optional:
+
+- `apply_type` (String) Defines the apply type to be used when downscaling.
+ - IMMEDIATE - pods are restarted immediately when new recommendation is generated.
+ - DEFERRED - pods are not restarted and recommendation values are applied during natural restarts only (new deployment, etc.)
+
+
### Nested Schema for `startup`
diff --git a/examples/aks/aks_cluster/providers.tf b/examples/aks/aks_cluster/providers.tf
index bdab1922..b1a9e19e 100644
--- a/examples/aks/aks_cluster/providers.tf
+++ b/examples/aks/aks_cluster/providers.tf
@@ -4,5 +4,6 @@ provider "azurerm" {
}
provider "azuread" {
+ version = "2.53.1"
tenant_id = data.azurerm_subscription.current.tenant_id
}
diff --git a/examples/resources/castai_workload_scaling_policy/resource.tf b/examples/resources/castai_workload_scaling_policy/resource.tf
index fd6978b2..ebdd3ee0 100644
--- a/examples/resources/castai_workload_scaling_policy/resource.tf
+++ b/examples/resources/castai_workload_scaling_policy/resource.tf
@@ -18,4 +18,7 @@ resource "castai_workload_scaling_policy" "services" {
startup {
period_seconds = 240
}
+ downscaling {
+ apply_type = "DEFERRED"
+ }
}
\ No newline at end of file