diff --git a/clusters/resource_cluster.go b/clusters/resource_cluster.go index 0948185d5c..1d31d931e5 100644 --- a/clusters/resource_cluster.go +++ b/clusters/resource_cluster.go @@ -2,15 +2,18 @@ package clusters import ( "context" + "errors" "fmt" "log" "strings" "time" "github.com/hashicorp/go-cty/cty" + "github.com/hashicorp/terraform-plugin-sdk/v2/helper/retry" "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" "github.com/hashicorp/terraform-plugin-sdk/v2/helper/validation" + "github.com/databricks/databricks-sdk-go/apierr" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/terraform-provider-databricks/common" "github.com/databricks/terraform-provider-databricks/libraries" @@ -604,7 +607,21 @@ func resourceClusterUpdate(ctx context.Context, d *schema.ResourceData, c *commo return err } cluster.ForceSendFields = []string{"NumWorkers"} - _, err = clusters.Edit(ctx, cluster) + + err = retry.RetryContext(ctx, 15*time.Minute, func() *retry.RetryError { + _, err = clusters.Edit(ctx, cluster) + if err == nil { + return nil + } + var apiErr *apierr.APIError + // Only Running and Terminated clusters can be modified. In particular, autoscaling clusters cannot be modified + // while the resizing is ongoing. We retry in this case. Scaling can take several minutes. + if errors.As(err, &apiErr) && apiErr.ErrorCode == "INVALID_STATE" { + return retry.RetryableError(fmt.Errorf("cluster %s cannot be modified in its current state", clusterId)) + } + return retry.NonRetryableError(err) + }) + } if err != nil { return err diff --git a/clusters/resource_cluster_test.go b/clusters/resource_cluster_test.go index 40f8a2db62..53c693810a 100644 --- a/clusters/resource_cluster_test.go +++ b/clusters/resource_cluster_test.go @@ -965,6 +965,102 @@ func TestResourceClusterUpdate(t *testing.T) { assert.Equal(t, "abc", d.Id(), "Id should be the same as in reading") } +func TestResourceClusterUpdate_WhileScaling(t *testing.T) { + d, err := qa.ResourceFixture{ + Fixtures: []qa.HTTPFixture{ + { + Method: "GET", + Resource: "/api/2.1/clusters/get?cluster_id=abc", + ReuseRequest: true, + Response: compute.ClusterDetails{ + ClusterId: "abc", + NumWorkers: 100, + ClusterName: "Shared Autoscaling", + SparkVersion: "7.1-scala12", + NodeTypeId: "i3.xlarge", + AutoterminationMinutes: 15, + State: compute.StateRunning, + }, + }, + { + Method: "POST", + Resource: "/api/2.1/clusters/events", + ExpectedRequest: compute.GetEvents{ + ClusterId: "abc", + Limit: 1, + Order: compute.GetEventsOrderDesc, + EventTypes: []compute.EventType{compute.EventTypePinned, compute.EventTypeUnpinned}, + }, + Response: compute.GetEventsResponse{ + Events: []compute.ClusterEvent{}, + TotalCount: 0, + }, + }, + { + Method: "POST", + Resource: "/api/2.1/clusters/start", + ExpectedRequest: compute.StartCluster{ + ClusterId: "abc", + }, + }, + { + Method: "GET", + Resource: "/api/2.0/libraries/cluster-status?cluster_id=abc", + Response: compute.ClusterLibraryStatuses{ + LibraryStatuses: []compute.LibraryFullStatus{}, + }, + }, + { + Method: "POST", + Resource: "/api/2.1/clusters/edit", + ExpectedRequest: compute.ClusterDetails{ + AutoterminationMinutes: 15, + ClusterId: "abc", + NumWorkers: 100, + ClusterName: "Shared Autoscaling", + SparkVersion: "7.1-scala12", + NodeTypeId: "i3.xlarge", + }, + Response: common.APIErrorBody{ + ErrorCode: "INVALID_STATE", + }, + Status: 404, + }, + { + Method: "POST", + Resource: "/api/2.1/clusters/edit", + ExpectedRequest: compute.ClusterDetails{ + AutoterminationMinutes: 15, + ClusterId: "abc", + NumWorkers: 100, + ClusterName: "Shared Autoscaling", + SparkVersion: "7.1-scala12", + NodeTypeId: "i3.xlarge", + }, + }, + { + Method: "GET", + Resource: "/api/2.0/libraries/cluster-status?cluster_id=abc", + Response: compute.ClusterLibraryStatuses{ + LibraryStatuses: []compute.LibraryFullStatus{}, + }, + }, + }, + ID: "abc", + Update: true, + Resource: ResourceCluster(), + State: map[string]any{ + "autotermination_minutes": 15, + "cluster_name": "Shared Autoscaling", + "spark_version": "7.1-scala12", + "node_type_id": "i3.xlarge", + "num_workers": 100, + }, + }.Apply(t) + assert.NoError(t, err) + assert.Equal(t, "abc", d.Id(), "Id should be the same as in reading") +} + func TestResourceClusterUpdateWithPinned(t *testing.T) { d, err := qa.ResourceFixture{ Fixtures: []qa.HTTPFixture{