diff --git a/castai/resource_rebalancing_schedule.go b/castai/resource_rebalancing_schedule.go index 8e5359de..bc7f9a0f 100644 --- a/castai/resource_rebalancing_schedule.go +++ b/castai/resource_rebalancing_schedule.go @@ -95,6 +95,11 @@ func resourceRebalancingSchedule() *schema.Resource { ValidateDiagFunc: validation.ToDiagFunc(validation.IntAtLeast(1)), Description: "Minimum number of nodes that should be kept in the cluster after rebalancing.", }, + "keep_drain_timeout_nodes": { + Type: schema.TypeBool, + Optional: true, + Description: "Defines whether the nodes that failed to get drained until a predefined timeout, will be kept with a rebalancing.cast.ai/status=drain-failed annotation instead of forcefully drained.", + }, "execution_conditions": { Type: schema.TypeList, MaxItems: 1, @@ -243,6 +248,8 @@ func stateToSchedule(d *schema.ResourceData) (*sdk.ScheduledrebalancingV1Rebalan return nil, fmt.Errorf("parsing selector: %w", err) } + keepDrainTimeoutNodes := readOptionalValue[bool](launchConfigurationData, "keep_drain_timeout_nodes") + var executionConditions *sdk.ScheduledrebalancingV1ExecutionConditions executionConditionsData := launchConfigurationData["execution_conditions"].([]any) if len(executionConditionsData) != 0 { @@ -256,8 +263,9 @@ func stateToSchedule(d *schema.ResourceData) (*sdk.ScheduledrebalancingV1Rebalan NodeTtlSeconds: readOptionalNumber[int, int32](launchConfigurationData, "node_ttl_seconds"), NumTargetedNodes: readOptionalNumber[int, int32](launchConfigurationData, "num_targeted_nodes"), RebalancingOptions: &sdk.ScheduledrebalancingV1RebalancingOptions{ - MinNodes: readOptionalNumber[int, int32](launchConfigurationData, "rebalancing_min_nodes"), - ExecutionConditions: executionConditions, + MinNodes: readOptionalNumber[int, int32](launchConfigurationData, "rebalancing_min_nodes"), + KeepDrainTimeoutNodes: keepDrainTimeoutNodes, + ExecutionConditions: executionConditions, }, Selector: selector, } @@ -286,6 +294,7 @@ func scheduleToState(schedule *sdk.ScheduledrebalancingV1RebalancingSchedule, d if schedule.LaunchConfiguration.RebalancingOptions != nil { launchConfig["rebalancing_min_nodes"] = schedule.LaunchConfiguration.RebalancingOptions.MinNodes + launchConfig["keep_drain_timeout_nodes"] = schedule.LaunchConfiguration.RebalancingOptions.KeepDrainTimeoutNodes executionConditions := schedule.LaunchConfiguration.RebalancingOptions.ExecutionConditions if executionConditions != nil { diff --git a/castai/resource_rebalancing_schedule_test.go b/castai/resource_rebalancing_schedule_test.go index 6d915364..ff022442 100644 --- a/castai/resource_rebalancing_schedule_test.go +++ b/castai/resource_rebalancing_schedule_test.go @@ -82,6 +82,7 @@ resource "castai_rebalancing_schedule" "test" { node_ttl_seconds = 10 num_targeted_nodes = 3 rebalancing_min_nodes = 2 + keep_drain_timeout_nodes = true selector = jsonencode({ nodeSelectorTerms = [{ matchExpressions = [ diff --git a/castai/sdk/api.gen.go b/castai/sdk/api.gen.go index f2d399f1..b2f177d3 100644 --- a/castai/sdk/api.gen.go +++ b/castai/sdk/api.gen.go @@ -1493,6 +1493,10 @@ type ScheduledrebalancingV1RebalancingOptions struct { // Defines the conditions which must be met in order to fully execute the plan. ExecutionConditions *ScheduledrebalancingV1ExecutionConditions `json:"executionConditions,omitempty"` + // Defines whether the nodes that failed to get drained until a predefined timeout, will be kept with a + // rebalancing.cast.ai/status=drain-failed annotation instead of forcefully drained. + KeepDrainTimeoutNodes *bool `json:"keepDrainTimeoutNodes"` + // Minimum number of nodes that should be kept in the cluster after rebalancing. MinNodes *int32 `json:"minNodes,omitempty"` } diff --git a/docs/resources/rebalancing_schedule.md b/docs/resources/rebalancing_schedule.md index 6e98f520..5e84584a 100644 --- a/docs/resources/rebalancing_schedule.md +++ b/docs/resources/rebalancing_schedule.md @@ -26,6 +26,7 @@ resource "castai_rebalancing_schedule" "spots" { node_ttl_seconds = 300 num_targeted_nodes = 3 rebalancing_min_nodes = 2 + keep_drain_timeout_nodes = true selector = jsonencode({ nodeSelectorTerms = [{ matchExpressions = [ @@ -68,6 +69,7 @@ resource "castai_rebalancing_schedule" "spots" { Optional: - `execution_conditions` (Block List, Max: 1) (see [below for nested schema](#nestedblock--launch_configuration--execution_conditions)) +- `keep_drain_timeout_nodes` (Boolean) Defines whether the nodes that failed to get drained until a predefined timeout, will be kept with a rebalancing.cast.ai/status=drain-failed annotation instead of forcefully drained. - `node_ttl_seconds` (Number) Specifies amount of time since node creation before the node is allowed to be considered for automated rebalancing. - `num_targeted_nodes` (Number) Maximum number of nodes that will be selected for rebalancing. - `rebalancing_min_nodes` (Number) Minimum number of nodes that should be kept in the cluster after rebalancing. diff --git a/examples/eks/eks_cluster_autoscaler_polices/castai.tf b/examples/eks/eks_cluster_autoscaler_polices/castai.tf index 8235e023..c0dfa756 100644 --- a/examples/eks/eks_cluster_autoscaler_polices/castai.tf +++ b/examples/eks/eks_cluster_autoscaler_polices/castai.tf @@ -200,6 +200,7 @@ resource "castai_rebalancing_schedule" "spots" { node_ttl_seconds = 300 num_targeted_nodes = 3 rebalancing_min_nodes = 2 + keep_drain_timeout_nodes = false selector = jsonencode({ nodeSelectorTerms = [{ matchExpressions = [ diff --git a/examples/resources/castai_rebalancing_schedule/resource.tf b/examples/resources/castai_rebalancing_schedule/resource.tf index a84337d0..d90ad38c 100644 --- a/examples/resources/castai_rebalancing_schedule/resource.tf +++ b/examples/resources/castai_rebalancing_schedule/resource.tf @@ -11,6 +11,7 @@ resource "castai_rebalancing_schedule" "spots" { node_ttl_seconds = 300 num_targeted_nodes = 3 rebalancing_min_nodes = 2 + keep_drain_timeout_nodes = false selector = jsonencode({ nodeSelectorTerms = [{ matchExpressions = [