From af366dc36cbd5f97eb7b6b597cb6142ff9918053 Mon Sep 17 00:00:00 2001 From: Alex Ott Date: Thu, 8 Aug 2024 11:47:31 +0200 Subject: [PATCH 01/10] [Exporter] Add support for Vector Search assets (#3828) ## Changes This PR adds support for exporting of `databricks_vector_search_endpoint` and `databricks_vector_search_index` resources. ## Tests - [x] `make test` run locally - [x] relevant change in `docs/` folder - [ ] covered with integration tests in `internal/acceptance` - [ ] relevant acceptance tests are passing - [ ] using Go SDK --- docs/guides/experimental-exporter.md | 3 + exporter/exporter_test.go | 10 ++ exporter/importables.go | 96 ++++++++++++++++++- exporter/importables_test.go | 137 +++++++++++++++++++++++++++ 4 files changed, 244 insertions(+), 2 deletions(-) diff --git a/docs/guides/experimental-exporter.md b/docs/guides/experimental-exporter.md index e9cbad7b54..85ea6c955e 100644 --- a/docs/guides/experimental-exporter.md +++ b/docs/guides/experimental-exporter.md @@ -145,6 +145,7 @@ Services are just logical groups of resources used for filtering and organizatio * `uc-tables` - **listing** (*we can't list directly, only via dependencies to top-level object*) [databricks_sql_table](../resources/sql_table.md) resource. * `uc-volumes` - **listing** (*we can't list directly, only via dependencies to top-level object*) [databricks_volume](../resources/volume.md) * `users` - [databricks_user](../resources/user.md) and [databricks_service_principal](../resources/service_principal.md) are written to their own file, simply because of their amount. If you use SCIM provisioning, migrating workspaces is the only use case for importing `users` service. +* `vector-search` - **listing** exports [databricks_vector_search_endpoint](../resources/vector_search_endpoint.md) and [databricks_vector_search_index](../resources/vector_search_index.md) * `workspace` - **listing** [databricks_workspace_conf](../resources/workspace_conf.md) and [databricks_global_init_script](../resources/global_init_script.md) ## Secrets @@ -225,6 +226,8 @@ Exporter aims to generate HCL code for most of the resources within the Databric | [databricks_user](../resources/user.md) | Yes | No | Yes | Yes | | [databricks_user_instance_profile](../resources/user_instance_profile.md) | No | No | No | No | | [databricks_user_role](../resources/user_role.md) | Yes | No | Yes | Yes | +| [databricks_vector_search_endpoint](../resources/vector_search_endpoint.md) | Yes | No | Yes | No | +| [databricks_vector_search_index](../resources/vector_search_index.md) | Yes | No | Yes | No | | [databricks_volume](../resources/volume.md) | Yes | Yes | Yes | No | | [databricks_workspace_binding](../resources/workspace_binding.md) | Yes | No | Yes | No | | [databricks_workspace_conf](../resources/workspace_conf.md) | Yes (partial) | No | Yes | No | diff --git a/exporter/exporter_test.go b/exporter/exporter_test.go index 249c65fa50..ecbd0b34ab 100644 --- a/exporter/exporter_test.go +++ b/exporter/exporter_test.go @@ -24,6 +24,7 @@ import ( "github.com/databricks/databricks-sdk-go/service/settings" "github.com/databricks/databricks-sdk-go/service/sharing" "github.com/databricks/databricks-sdk-go/service/sql" + sdk_vs "github.com/databricks/databricks-sdk-go/service/vectorsearch" sdk_workspace "github.com/databricks/databricks-sdk-go/service/workspace" "github.com/databricks/terraform-provider-databricks/aws" "github.com/databricks/terraform-provider-databricks/clusters" @@ -305,6 +306,13 @@ var emptyRepos = qa.HTTPFixture{ Response: repos.ReposListResponse{}, } +var emptyVectorSearch = qa.HTTPFixture{ + Method: "GET", + ReuseRequest: true, + Resource: "/api/2.0/vector-search/endpoints?", + Response: sdk_vs.ListEndpointResponse{}, +} + var emptyShares = qa.HTTPFixture{ Method: "GET", ReuseRequest: true, @@ -484,6 +492,7 @@ func TestImportingUsersGroupsSecretScopes(t *testing.T) { emptySqlEndpoints, emptySqlQueries, emptySqlAlerts, + emptyVectorSearch, emptyPipelines, emptyClusterPolicies, emptyPolicyFamilies, @@ -757,6 +766,7 @@ func TestImportingNoResourcesError(t *testing.T) { emptyIpAccessLIst, emptyWorkspace, emptySqlEndpoints, + emptyVectorSearch, emptySqlQueries, emptySqlDashboards, emptySqlAlerts, diff --git a/exporter/importables.go b/exporter/importables.go index f6f0f217f7..e539c32192 100644 --- a/exporter/importables.go +++ b/exporter/importables.go @@ -25,6 +25,7 @@ import ( "github.com/databricks/databricks-sdk-go/service/settings" "github.com/databricks/databricks-sdk-go/service/sharing" "github.com/databricks/databricks-sdk-go/service/sql" + "github.com/databricks/databricks-sdk-go/service/vectorsearch" sdk_workspace "github.com/databricks/databricks-sdk-go/service/workspace" tfcatalog "github.com/databricks/terraform-provider-databricks/catalog" "github.com/databricks/terraform-provider-databricks/clusters" @@ -2615,6 +2616,10 @@ var resourcesMap map[string]importable = map[string]importable{ // TODO: it's better to use SecurableKind if it will be added to the Go SDK switch table.DataSourceFormat { case "VECTOR_INDEX_FORMAT": + ic.Emit(&resource{ + Resource: "databricks_vector_search_index", + ID: table.FullName, + }) case "MYSQL_FORMAT": ic.EmitIfUpdatedAfterMillis(&resource{ Resource: "databricks_online_table", @@ -3311,8 +3316,7 @@ var resourcesMap map[string]importable = map[string]importable{ WorkspaceLevel: true, Service: "uc-online-tables", Import: func(ic *importContext, r *resource) error { - tableFullName := r.ID - ic.emitUCGrantsWithOwner("table/"+tableFullName, r) + ic.emitUCGrantsWithOwner("table/"+r.ID, r) ic.Emit(&resource{ Resource: "databricks_sql_table", ID: r.Data.Get("spec.0.source_table_full_name").(string), @@ -3329,4 +3333,92 @@ var resourcesMap map[string]importable = map[string]importable{ {Path: "spec.source_table_full_name", Resource: "databricks_sql_table"}, }, }, + "databricks_vector_search_endpoint": { + WorkspaceLevel: true, + Service: "vector-search", + List: func(ic *importContext) error { + endpoints, err := ic.workspaceClient.VectorSearchEndpoints.ListEndpointsAll(ic.Context, vectorsearch.ListEndpointsRequest{}) + if err != nil { + log.Printf("[ERROR] listing vector search endpoints: %s", err.Error()) + return err + } + for _, ep := range endpoints { + ic.EmitIfUpdatedAfterMillis(&resource{ + Resource: "databricks_vector_search_endpoint", + ID: ep.Name, + }, ep.LastUpdatedTimestamp, fmt.Sprintf("vector search endpoint '%s'", ep.Name)) + + } + return nil + }, + Import: func(ic *importContext, r *resource) error { + indexes, err := ic.workspaceClient.VectorSearchIndexes.ListIndexesAll(ic.Context, vectorsearch.ListIndexesRequest{ + EndpointName: r.ID, + }) + if err != nil { + log.Printf("[ERROR] listing vector search indexes for endpoint %s: %s", r.ID, err.Error()) + return err + } + for _, idx := range indexes { + ic.Emit(&resource{ + Resource: "databricks_vector_search_index", + ID: idx.Name, + }) + } + return nil + }, + }, + "databricks_vector_search_index": { + WorkspaceLevel: true, + Service: "vector-search", + Import: func(ic *importContext, r *resource) error { + ic.emitUCGrantsWithOwner("table/"+r.ID, r) + s := ic.Resources["databricks_vector_search_index"].Schema + var vsi vectorsearch.VectorIndex + common.DataToStructPointer(r.Data, s, &vsi) + if vsi.EndpointName != "" { + ic.Emit(&resource{ + Resource: "databricks_vector_search_endpoint", + ID: vsi.EndpointName, + }) + } + if vsi.DeltaSyncIndexSpec != nil { + ic.Emit(&resource{ + Resource: "databricks_sql_table", + ID: vsi.DeltaSyncIndexSpec.SourceTable, + }) + if vsi.DeltaSyncIndexSpec.EmbeddingWritebackTable != "" { + ic.Emit(&resource{ + Resource: "databricks_sql_table", + ID: vsi.DeltaSyncIndexSpec.EmbeddingWritebackTable, + }) + } + for _, col := range vsi.DeltaSyncIndexSpec.EmbeddingSourceColumns { + if col.EmbeddingModelEndpointName != "" { + ic.Emit(&resource{ + Resource: "databricks_model_serving", + ID: col.EmbeddingModelEndpointName, + }) + } + } + } + if vsi.DirectAccessIndexSpec != nil { + for _, col := range vsi.DirectAccessIndexSpec.EmbeddingSourceColumns { + if col.EmbeddingModelEndpointName != "" { + ic.Emit(&resource{ + Resource: "databricks_model_serving", + ID: col.EmbeddingModelEndpointName, + }) + } + } + } + return nil + }, + Depends: []reference{ + {Path: "delta_sync_index_spec.source_table", Resource: "databricks_sql_table"}, + {Path: "endpoint_name", Resource: "databricks_vector_search_endpoint"}, + {Path: "delta_sync_index_spec.embedding_source_columns.embedding_model_endpoint_name", Resource: "databricks_model_serving"}, + {Path: "direct_access_index_spec.embedding_source_columns.embedding_model_endpoint_name", Resource: "databricks_model_serving"}, + }, + }, } diff --git a/exporter/importables_test.go b/exporter/importables_test.go index cd4e27846f..1904ab0d1d 100644 --- a/exporter/importables_test.go +++ b/exporter/importables_test.go @@ -17,6 +17,7 @@ import ( sdk_jobs "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/databricks/databricks-sdk-go/service/pipelines" "github.com/databricks/databricks-sdk-go/service/sharing" + sdk_vs "github.com/databricks/databricks-sdk-go/service/vectorsearch" sdk_workspace "github.com/databricks/databricks-sdk-go/service/workspace" tfcatalog "github.com/databricks/terraform-provider-databricks/catalog" "github.com/databricks/terraform-provider-databricks/clusters" @@ -35,6 +36,7 @@ import ( "github.com/databricks/terraform-provider-databricks/secrets" tfsharing "github.com/databricks/terraform-provider-databricks/sharing" "github.com/databricks/terraform-provider-databricks/storage" + tf_vs "github.com/databricks/terraform-provider-databricks/vectorsearch" "github.com/databricks/terraform-provider-databricks/workspace" "github.com/hashicorp/hcl/v2/hclwrite" "github.com/stretchr/testify/assert" @@ -2274,3 +2276,138 @@ func TestImportUcOnlineTable(t *testing.T) { assert.True(t, ic.testEmits["databricks_grants[] (id: table/main.tmp.tbl_ot)"]) }) } + +func TestImportVectorSearchEndpointList(t *testing.T) { + qa.HTTPFixturesApply(t, []qa.HTTPFixture{ + { + Method: "GET", + Resource: "/api/2.0/vector-search/endpoints?", + Response: sdk_vs.ListEndpointResponse{ + Endpoints: []sdk_vs.EndpointInfo{ + { + Name: "test", + LastUpdatedTimestamp: 1234567890, + }, + { + Name: "test2", + LastUpdatedTimestamp: 2234567890, + }, + }, + }, + }, + }, func(ctx context.Context, client *common.DatabricksClient) { + ic := importContextForTestWithClient(ctx, client) + tmpDir := fmt.Sprintf("/tmp/tf-%s", qa.RandomName()) + defer os.RemoveAll(tmpDir) + os.Mkdir(tmpDir, 0700) + ic.Directory = tmpDir + ic.enableServices("vector-search") + ic.currentMetastore = currentMetastoreResponse + + err := resourcesMap["databricks_vector_search_endpoint"].List(ic) + assert.NoError(t, err) + require.Equal(t, 2, len(ic.testEmits)) + assert.True(t, ic.testEmits["databricks_vector_search_endpoint[] (id: test)"]) + assert.True(t, ic.testEmits["databricks_vector_search_endpoint[] (id: test2)"]) + }) +} + +func TestImportVectorSearchEndpoint(t *testing.T) { + vseName := "test" + qa.HTTPFixturesApply(t, []qa.HTTPFixture{ + { + Method: "GET", + Resource: "/api/2.0/vector-search/indexes?endpoint_name=test", + Response: sdk_vs.ListVectorIndexesResponse{ + VectorIndexes: []sdk_vs.MiniVectorIndex{ + { + Name: "idx1", + }, + { + Name: "idx2", + }, + }, + }, + }, + }, func(ctx context.Context, client *common.DatabricksClient) { + ic := importContextForTestWithClient(ctx, client) + tmpDir := fmt.Sprintf("/tmp/tf-%s", qa.RandomName()) + defer os.RemoveAll(tmpDir) + os.Mkdir(tmpDir, 0700) + ic.Directory = tmpDir + ic.enableServices("vector-search") + ic.currentMetastore = currentMetastoreResponse + + d := tf_vs.ResourceVectorSearchEndpoint().ToResource().TestResourceData() + vse := sdk_vs.EndpointInfo{ + Name: vseName, + } + d.SetId(vseName) + d.MarkNewResource() + scm := tf_vs.ResourceVectorSearchEndpoint().Schema + err := common.StructToData(vse, scm, d) + require.NoError(t, err) + + err = resourcesMap["databricks_vector_search_endpoint"].Import(ic, &resource{ + ID: vseName, + Data: d, + }) + assert.NoError(t, err) + require.Equal(t, 2, len(ic.testEmits)) + assert.True(t, ic.testEmits["databricks_vector_search_index[] (id: idx1)"]) + assert.True(t, ic.testEmits["databricks_vector_search_index[] (id: idx2)"]) + }) +} + +func TestImportVectorSearchIndex(t *testing.T) { + qa.HTTPFixturesApply(t, []qa.HTTPFixture{}, func(ctx context.Context, client *common.DatabricksClient) { + ic := importContextForTestWithClient(ctx, client) + tmpDir := fmt.Sprintf("/tmp/tf-%s", qa.RandomName()) + defer os.RemoveAll(tmpDir) + os.Mkdir(tmpDir, 0700) + ic.Directory = tmpDir + ic.enableServices("vector-search,uc-tables,uc-grants,model-serving") + ic.currentMetastore = currentMetastoreResponse + + vsiName := "main.tmp.vsi" + d := tf_vs.ResourceVectorSearchIndex().ToResource().TestResourceData() + ot := sdk_vs.VectorIndex{ + Name: vsiName, + PrimaryKey: "id", + EndpointName: "vs", + DeltaSyncIndexSpec: &sdk_vs.DeltaSyncVectorIndexSpecResponse{ + SourceTable: "main.tmp.tbl", + EmbeddingSourceColumns: []sdk_vs.EmbeddingSourceColumn{ + { + Name: "col1", + EmbeddingModelEndpointName: "test", + }, + }, + }, + DirectAccessIndexSpec: &sdk_vs.DirectAccessVectorIndexSpec{ + EmbeddingSourceColumns: []sdk_vs.EmbeddingSourceColumn{ + { + Name: "col1", + EmbeddingModelEndpointName: "test", + }, + }, + }, + } + d.SetId(vsiName) + d.MarkNewResource() + scm := tf_vs.ResourceVectorSearchIndex().Schema + err := common.StructToData(ot, scm, d) + require.NoError(t, err) + + err = resourcesMap["databricks_vector_search_index"].Import(ic, &resource{ + ID: vsiName, + Data: d, + }) + assert.NoError(t, err) + require.Equal(t, 4, len(ic.testEmits)) + assert.True(t, ic.testEmits["databricks_grants[] (id: table/main.tmp.vsi)"]) + assert.True(t, ic.testEmits["databricks_vector_search_endpoint[] (id: vs)"]) + assert.True(t, ic.testEmits["databricks_sql_table[] (id: main.tmp.tbl)"]) + assert.True(t, ic.testEmits["databricks_model_serving[] (id: test)"]) + }) +} From fed73077debc212e282e25acb2e62250c85806c2 Mon Sep 17 00:00:00 2001 From: Alex Ott Date: Thu, 8 Aug 2024 11:54:02 +0200 Subject: [PATCH 02/10] [Exporter] Add support for `databricks_notification_destination` (#3861) ## Changes Now we can export `databricks_notification_destination` resources. Extended `databricks_job` to emit email notifications on the task level, and webhooks on job and task levels ## Tests - [x] `make test` run locally - [x] relevant change in `docs/` folder - [ ] covered with integration tests in `internal/acceptance` - [ ] relevant acceptance tests are passing - [x] using Go SDK --- docs/guides/experimental-exporter.md | 9 +- docs/resources/notification_destination.md | 1 + exporter/exporter_test.go | 183 +++++++++++++++++++++ exporter/importables.go | 128 ++++++++++++++ exporter/util.go | 10 ++ 5 files changed, 328 insertions(+), 3 deletions(-) diff --git a/docs/guides/experimental-exporter.md b/docs/guides/experimental-exporter.md index 85ea6c955e..713c6579f7 100644 --- a/docs/guides/experimental-exporter.md +++ b/docs/guides/experimental-exporter.md @@ -125,6 +125,7 @@ Services are just logical groups of resources used for filtering and organizatio * `pools` - **listing** [instance pools](../resources/instance_pool.md). * `repos` - **listing** [databricks_repo](../resources/repo.md) * `secrets` - **listing** [databricks_secret_scope](../resources/secret_scope.md) along with [keys](../resources/secret.md) and [ACLs](../resources/secret_acl.md). +* `settings` - **listing** [databricks_notification_destination](../resources/notification_destination.md). * `sql-alerts` - **listing** [databricks_sql_alert](../resources/sql_alert.md). * `sql-dashboards` - **listing** [databricks_sql_dashboard](../resources/sql_dashboard.md) along with associated [databricks_sql_widget](../resources/sql_widget.md) and [databricks_sql_visualization](../resources/sql_visualization.md). * `sql-endpoints` - **listing** [databricks_sql_endpoint](../resources/sql_endpoint.md) along with [databricks_sql_global_config](../resources/sql_global_config.md). @@ -178,7 +179,7 @@ Exporter aims to generate HCL code for most of the resources within the Databric | [databricks_dbfs_file](../resources/dbfs_file.md) | Yes | No | Yes | No | | [databricks_external_location](../resources/external_location.md) | Yes | Yes | Yes | No | | [databricks_file](../resources/file.md) | Yes | No | Yes | No | -| [databricks_global_init_script](../resources/global_init_script.md) | Yes | Yes | Yes | No | +| [databricks_global_init_script](../resources/global_init_script.md) | Yes | Yes | Yes\*\* | No | | [databricks_grants](../resources/grants.md) | Yes | No | Yes | No | | [databricks_group](../resources/group.md) | Yes | No | Yes | Yes | | [databricks_group_instance_profile](../resources/group_instance_profile.md) | Yes | No | Yes | No | @@ -186,7 +187,7 @@ Exporter aims to generate HCL code for most of the resources within the Databric | [databricks_group_role](../resources/group_role.md) | Yes | No | Yes | Yes | | [databricks_instance_pool](../resources/instance_pool.md) | Yes | No | Yes | No | | [databricks_instance_profile](../resources/instance_profile.md) | Yes | No | Yes | No | -| [databricks_ip_access_list](../resources/ip_access_list.md) | Yes | Yes | Yes | No | +| [databricks_ip_access_list](../resources/ip_access_list.md) | Yes | Yes | Yes\*\* | No | | [databricks_job](../resources/job.md) | Yes | No | Yes | No | | [databricks_library](../resources/library.md) | Yes\* | No | Yes | No | | [databricks_metastore](../resources/metastore.md) | Yes | Yes | No | Yes | @@ -197,6 +198,7 @@ Exporter aims to generate HCL code for most of the resources within the Databric | [databricks_model_serving](../resources/model_serving) | Yes | Yes | Yes | No | | [databricks_mws_permission_assignment](../resources/mws_permission_assignment.md) | Yes | No | No | Yes | | [databricks_notebook](../resources/notebook.md) | Yes | Yes | Yes | No | +| [databricks_notification_destination](../resources/notification_destination.md) | Yes | No | Yes\*\* | No | | [databricks_obo_token](../resources/obo_token.md) | Not Applicable | No | No | No | | [databricks_online_table](../resources/online_table.md) | Yes | Yes | Yes | No | | [databricks_permissions](../resources/permissions.md) | Yes | No | Yes | No | @@ -230,9 +232,10 @@ Exporter aims to generate HCL code for most of the resources within the Databric | [databricks_vector_search_index](../resources/vector_search_index.md) | Yes | No | Yes | No | | [databricks_volume](../resources/volume.md) | Yes | Yes | Yes | No | | [databricks_workspace_binding](../resources/workspace_binding.md) | Yes | No | Yes | No | -| [databricks_workspace_conf](../resources/workspace_conf.md) | Yes (partial) | No | Yes | No | +| [databricks_workspace_conf](../resources/workspace_conf.md) | Yes (partial) | No | Yes\*\* | No | | [databricks_workspace_file](../resources/workspace_file.md) | Yes | Yes | Yes | No | Notes: * \* - libraries are exported as blocks inside the cluster definition instead of generating `databricks_library` resources. This is done to decrease the number of generated resources. +* \*\* - requires workspace admin permission. diff --git a/docs/resources/notification_destination.md b/docs/resources/notification_destination.md index 21c1170402..7a5273d039 100644 --- a/docs/resources/notification_destination.md +++ b/docs/resources/notification_destination.md @@ -97,3 +97,4 @@ The following arguments are supported: In addition to all arguments above, the following attributes are exported: * `id` - The unique ID of the Notification Destination. +* `destination_type` - the type of Notification Destination. diff --git a/exporter/exporter_test.go b/exporter/exporter_test.go index ecbd0b34ab..cfdaea301e 100644 --- a/exporter/exporter_test.go +++ b/exporter/exporter_test.go @@ -452,6 +452,12 @@ var emptyLakeviewList = qa.HTTPFixture{ ReuseRequest: true, } +var emptyDestinationNotficationsList = qa.HTTPFixture{ + Method: "GET", + Resource: "/api/2.0/notification-destinations?", + Response: settings.ListNotificationDestinationsResponse{}, +} + func TestImportingUsersGroupsSecretScopes(t *testing.T) { listSpFixtures := qa.ListServicePrincipalsFixtures([]iam.ServicePrincipal{ { @@ -472,6 +478,7 @@ func TestImportingUsersGroupsSecretScopes(t *testing.T) { }) qa.HTTPFixturesApply(t, []qa.HTTPFixture{ + emptyDestinationNotficationsList, noCurrentMetastoreAttached, emptyLakeviewList, emptyMetastoreList, @@ -748,6 +755,7 @@ func TestImportingNoResourcesError(t *testing.T) { }, noCurrentMetastoreAttached, emptyLakeviewList, + emptyDestinationNotficationsList, emptyMetastoreList, emptyRepos, emptyExternalLocations, @@ -1106,6 +1114,11 @@ func TestImportingJobs_JobList(t *testing.T) { EmailNotifications: &sdk_jobs.JobEmailNotifications{ OnFailure: []string{"user@domain.com"}, }, + WebhookNotifications: &sdk_jobs.WebhookNotifications{ + OnSuccess: []sdk_jobs.Webhook{ + {Id: "123"}, + }, + }, Libraries: []compute.Library{ {Jar: "dbfs:/FileStore/jars/test.jar"}, {Whl: "/Workspace/Repos/user@domain.com/repo/test.whl"}, @@ -1356,6 +1369,14 @@ func TestImportingJobs_JobListMultiTask(t *testing.T) { RunJobTask: &jobs.RunJobTask{ JobID: 14, }, + WebhookNotifications: &sdk_jobs.WebhookNotifications{ + OnSuccess: []sdk_jobs.Webhook{ + {Id: "123"}, + }, + }, + EmailNotifications: &sdk_jobs.TaskEmailNotifications{ + OnFailure: []string{"user@domain.com"}, + }, }, { TaskKey: "dummy2", @@ -1376,6 +1397,14 @@ func TestImportingJobs_JobListMultiTask(t *testing.T) { }, Name: "Dummy", Format: "MULTI_TASK", + WebhookNotifications: &sdk_jobs.WebhookNotifications{ + OnSuccess: []sdk_jobs.Webhook{ + {Id: "123"}, + }, + }, + EmailNotifications: &sdk_jobs.JobEmailNotifications{ + OnFailure: []string{"user@domain.com"}, + }, JobClusters: []jobs.JobCluster{ { JobClusterKey: "shared", @@ -2745,3 +2774,157 @@ func TestImportingLakeviewDashboards(t *testing.T) { assert.Equal(t, `{}`, contentStr) }) } + +func TestNotificationDestinationExport(t *testing.T) { + qa.HTTPFixturesApply(t, []qa.HTTPFixture{ + meAdminFixture, + noCurrentMetastoreAttached, + { + Method: "GET", + Resource: "/api/2.0/notification-destinations?", + Response: settings.ListNotificationDestinationsResponse{ + Results: []settings.ListNotificationDestinationsResult{ + { + DisplayName: "email", + Id: "123", + DestinationType: "EMAIL", + }, + { + DisplayName: "slack", + Id: "234", + DestinationType: "SLACK", + }, + { + DisplayName: "teams", + Id: "345", + DestinationType: "MICROSOFT_TEAMS", + }, + { + DisplayName: "pagerdruty", + Id: "456", + DestinationType: "PAGERDUTY", + }, + { + DisplayName: "webhook", + Id: "8481e00d-3e55-4c6c-8462-33b60d1cdc94", + DestinationType: "WEBHOOK", + }, + }, + }, + }, + { + Method: "GET", + Resource: "/api/2.0/notification-destinations/123?", + Response: settings.NotificationDestination{ + DisplayName: "email", + Id: "123", + DestinationType: "EMAIL", + Config: &settings.Config{ + Email: &settings.EmailConfig{ + Addresses: []string{"user@domain.com"}, + }, + }, + }, + }, + { + Method: "GET", + Resource: "/api/2.0/notification-destinations/234?", + Response: settings.NotificationDestination{ + DisplayName: "slack", + Id: "234", + DestinationType: "SLACK", + Config: &settings.Config{ + Slack: &settings.SlackConfig{ + UrlSet: true, + }, + }, + }, + }, + { + Method: "GET", + Resource: "/api/2.0/notification-destinations/345?", + Response: settings.NotificationDestination{ + DisplayName: "teams", + Id: "345", + DestinationType: "MICROSOFT_TEAMS", + Config: &settings.Config{ + MicrosoftTeams: &settings.MicrosoftTeamsConfig{ + UrlSet: true, + }, + }, + }, + }, + { + Method: "GET", + Resource: "/api/2.0/notification-destinations/456?", + Response: settings.NotificationDestination{ + DisplayName: "pagerdruty", + Id: "456", + DestinationType: "PAGERDUTY", + Config: &settings.Config{ + Pagerduty: &settings.PagerdutyConfig{ + IntegrationKeySet: true, + }, + }, + }, + }, + { + Method: "GET", + Resource: "/api/2.0/notification-destinations/8481e00d-3e55-4c6c-8462-33b60d1cdc94?", + Response: settings.NotificationDestination{ + DisplayName: "webhook", + Id: "567", + DestinationType: "WEBHOOK", + Config: &settings.Config{ + GenericWebhook: &settings.GenericWebhookConfig{ + UrlSet: true, + PasswordSet: true, + }, + }, + }, + }, + }, func(ctx context.Context, client *common.DatabricksClient) { + tmpDir := fmt.Sprintf("/tmp/tf-%s", qa.RandomName()) + defer os.RemoveAll(tmpDir) + + ic := newImportContext(client) + ic.Directory = tmpDir + ic.enableListing("settings") + ic.enableServices("settings") + + err := ic.Run() + assert.NoError(t, err) + + content, err := os.ReadFile(tmpDir + "/settings.tf") + assert.NoError(t, err) + contentStr := string(content) + log.Printf("[DEBUG] contentStr: %s", contentStr) + assert.True(t, strings.Contains(contentStr, `resource "databricks_notification_destination" "pagerdruty_456"`)) + assert.True(t, strings.Contains(contentStr, `resource "databricks_notification_destination" "teams_345"`)) + assert.True(t, strings.Contains(contentStr, `resource "databricks_notification_destination" "email_123" { + display_name = "email" + config { + email { + addresses = ["user@domain.com"] + } + } +}`)) + assert.True(t, strings.Contains(contentStr, `resource "databricks_notification_destination" "webhook_8481e00d" { + display_name = "webhook" + config { + generic_webhook { + url = var.config_webhook_8481e00d + password = var.config_webhook_8481e00d_1 + } + } +}`)) + assert.True(t, strings.Contains(contentStr, `resource "databricks_notification_destination" "slack_234" { + display_name = "slack" + config { + slack { + url = var.config_slack_234 + } + } +}`)) + }) +} diff --git a/exporter/importables.go b/exporter/importables.go index e539c32192..98b4f8c93e 100644 --- a/exporter/importables.go +++ b/exporter/importables.go @@ -34,6 +34,7 @@ import ( "github.com/databricks/terraform-provider-databricks/mws" "github.com/databricks/terraform-provider-databricks/permissions" "github.com/databricks/terraform-provider-databricks/repos" + tfsettings "github.com/databricks/terraform-provider-databricks/settings" tfsharing "github.com/databricks/terraform-provider-databricks/sharing" tfsql "github.com/databricks/terraform-provider-databricks/sql" sql_api "github.com/databricks/terraform-provider-databricks/sql/api" @@ -434,12 +435,31 @@ var resourcesMap map[string]importable = map[string]importable{ {Path: "task.sql_task.dashboard.dashboard_id", Resource: "databricks_sql_dashboard"}, {Path: "task.sql_task.query.query_id", Resource: "databricks_sql_query"}, {Path: "task.sql_task.warehouse_id", Resource: "databricks_sql_endpoint"}, + {Path: "task.webhook_notifications.on_duration_warning_threshold_exceeded.id", Resource: "databricks_notification_destination"}, + {Path: "task.webhook_notifications.on_failure.id", Resource: "databricks_notification_destination"}, + {Path: "task.webhook_notifications.on_start.id", Resource: "databricks_notification_destination"}, + {Path: "task.webhook_notifications.on_success.id", Resource: "databricks_notification_destination"}, + {Path: "task.webhook_notifications.on_streaming_backlog_exceeded.id", Resource: "databricks_notification_destination"}, + {Path: "task.email_notifications.on_duration_warning_threshold_exceeded", Resource: "databricks_user", + Match: "user_name", MatchType: MatchCaseInsensitive}, + {Path: "task.email_notifications.on_failure", Resource: "databricks_user", Match: "user_name", MatchType: MatchCaseInsensitive}, + {Path: "task.email_notifications.on_start", Resource: "databricks_user", Match: "user_name", MatchType: MatchCaseInsensitive}, + {Path: "task.email_notifications.on_success", Resource: "databricks_user", Match: "user_name", MatchType: MatchCaseInsensitive}, + {Path: "task.email_notifications.on_streaming_backlog_exceeded", Resource: "databricks_user", + Match: "user_name", MatchType: MatchCaseInsensitive}, {Path: "run_as.user_name", Resource: "databricks_user", Match: "user_name", MatchType: MatchCaseInsensitive}, + {Path: "webhook_notifications.on_duration_warning_threshold_exceeded.id", Resource: "databricks_notification_destination"}, + {Path: "webhook_notifications.on_failure.id", Resource: "databricks_notification_destination"}, + {Path: "webhook_notifications.on_start.id", Resource: "databricks_notification_destination"}, + {Path: "webhook_notifications.on_success.id", Resource: "databricks_notification_destination"}, + {Path: "webhook_notifications.on_streaming_backlog_exceeded.id", Resource: "databricks_notification_destination"}, {Path: "email_notifications.on_duration_warning_threshold_exceeded", Resource: "databricks_user", Match: "user_name", MatchType: MatchCaseInsensitive}, {Path: "email_notifications.on_failure", Resource: "databricks_user", Match: "user_name", MatchType: MatchCaseInsensitive}, {Path: "email_notifications.on_start", Resource: "databricks_user", Match: "user_name", MatchType: MatchCaseInsensitive}, {Path: "email_notifications.on_success", Resource: "databricks_user", Match: "user_name", MatchType: MatchCaseInsensitive}, + {Path: "email_notifications.on_streaming_backlog_exceeded", Resource: "databricks_user", + Match: "user_name", MatchType: MatchCaseInsensitive}, {Path: "task.library.whl", Resource: "databricks_repo", Match: "workspace_path", MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, {Path: "task.new_cluster.init_scripts.workspace.destination", Resource: "databricks_repo", Match: "workspace_path", @@ -581,6 +601,21 @@ var resourcesMap map[string]importable = map[string]importable{ ID: task.ExistingClusterId, }) ic.emitLibraries(task.Libraries) + + if task.WebhookNotifications != nil { + ic.emitJobsDestinationNotifications(task.WebhookNotifications.OnFailure) + ic.emitJobsDestinationNotifications(task.WebhookNotifications.OnSuccess) + ic.emitJobsDestinationNotifications(task.WebhookNotifications.OnDurationWarningThresholdExceeded) + ic.emitJobsDestinationNotifications(task.WebhookNotifications.OnStart) + ic.emitJobsDestinationNotifications(task.WebhookNotifications.OnStreamingBacklogExceeded) + } + if task.EmailNotifications != nil { + ic.emitListOfUsers(task.EmailNotifications.OnDurationWarningThresholdExceeded) + ic.emitListOfUsers(task.EmailNotifications.OnFailure) + ic.emitListOfUsers(task.EmailNotifications.OnStart) + ic.emitListOfUsers(task.EmailNotifications.OnSuccess) + ic.emitListOfUsers(task.EmailNotifications.OnStreamingBacklogExceeded) + } } for _, jc := range job.JobClusters { ic.importCluster(&jc.NewCluster) @@ -606,6 +641,14 @@ var resourcesMap map[string]importable = map[string]importable{ ic.emitListOfUsers(job.EmailNotifications.OnFailure) ic.emitListOfUsers(job.EmailNotifications.OnStart) ic.emitListOfUsers(job.EmailNotifications.OnSuccess) + ic.emitListOfUsers(job.EmailNotifications.OnStreamingBacklogExceeded) + } + if job.WebhookNotifications != nil { + ic.emitJobsDestinationNotifications(job.WebhookNotifications.OnFailure) + ic.emitJobsDestinationNotifications(job.WebhookNotifications.OnSuccess) + ic.emitJobsDestinationNotifications(job.WebhookNotifications.OnDurationWarningThresholdExceeded) + ic.emitJobsDestinationNotifications(job.WebhookNotifications.OnStart) + ic.emitJobsDestinationNotifications(job.WebhookNotifications.OnStreamingBacklogExceeded) } return ic.importLibraries(r.Data, s) @@ -3312,6 +3355,91 @@ var resourcesMap map[string]importable = map[string]importable{ {Path: "parent_path", Resource: "databricks_service_principal"}, }, }, + "databricks_notification_destination": { + WorkspaceLevel: true, + Service: "settings", + Name: func(ic *importContext, d *schema.ResourceData) string { + name := d.Get("display_name").(string) + if name != "" { + name += "_" + } + id := d.Id() + if len(id) >= 8 { + id = id[:8] + } + return nameNormalizationRegex.ReplaceAllString(fmt.Sprintf("%s_%s", name, id), "_") + }, + List: func(ic *importContext) error { + if !ic.meAdmin { + return fmt.Errorf("notifications can be imported only by admin") + } + notifications, err := ic.workspaceClient.NotificationDestinations.ListAll(ic.Context, settings.ListNotificationDestinationsRequest{}) + if err != nil { + return err + } + for _, n := range notifications { + ic.Emit(&resource{ + Resource: "databricks_notification_destination", + ID: n.Id, + }) + } + return nil + }, + Import: func(ic *importContext, r *resource) error { + var notificationDestination tfsettings.NDStruct + s := ic.Resources["databricks_notification_destination"].Schema + common.DataToStructPointer(r.Data, s, ¬ificationDestination) + if notificationDestination.DestinationType == "EMAIL" && notificationDestination.Config != nil && + notificationDestination.Config.Email != nil { + for _, email := range notificationDestination.Config.Email.Addresses { + ic.emitUserOrServicePrincipal(email) + } + } + return nil + }, + ShouldOmitField: func(ic *importContext, pathString string, as *schema.Schema, d *schema.ResourceData) bool { + var notificationDestination tfsettings.NDStruct + s := ic.Resources["databricks_notification_destination"].Schema + common.DataToStructPointer(d, s, ¬ificationDestination) + if notificationDestination.Config != nil { + switch notificationDestination.DestinationType { + case "WEBHOOK": + if notificationDestination.Config.GenericWebhook != nil { + switch pathString { + case "config.0.generic_webhook.0.url": + return !notificationDestination.Config.GenericWebhook.UrlSet + case "config.0.generic_webhook.0.username": + return !notificationDestination.Config.GenericWebhook.UsernameSet + case "config.0.generic_webhook.0.password": + return !notificationDestination.Config.GenericWebhook.PasswordSet + } + } + case "SLACK": + if notificationDestination.Config.Slack != nil && pathString == "config.0.slack.0.url" { + return !notificationDestination.Config.Slack.UrlSet + } + case "PAGERDUTY": + if notificationDestination.Config.Pagerduty != nil && pathString == "config.0.pagerduty.0.integration_key" { + return !notificationDestination.Config.Pagerduty.IntegrationKeySet + } + case "MICROSOFT_TEAMS": + if notificationDestination.Config.MicrosoftTeams != nil && pathString == "config.0.microsoft_teams.0.url" { + return !notificationDestination.Config.MicrosoftTeams.UrlSet + } + } + } + return defaultShouldOmitFieldFunc(ic, pathString, as, d) + }, + Depends: []reference{ + {Path: "config.email.addresses", Resource: "databricks_user", Match: "user_name", MatchType: MatchCaseInsensitive}, + {Path: "config.microsoft_teams.url", Variable: true}, + {Path: "config.pagerduty.integration_key", Variable: true}, + {Path: "config.generic_webhook.url", Variable: true}, + {Path: "config.generic_webhook.username", Variable: true}, + {Path: "config.generic_webhook.password", Variable: true}, + {Path: "config.slack.url", Variable: true}, + }, + }, "databricks_online_table": { WorkspaceLevel: true, Service: "uc-online-tables", diff --git a/exporter/util.go b/exporter/util.go index a1e210037c..6bd91871ef 100644 --- a/exporter/util.go +++ b/exporter/util.go @@ -26,6 +26,7 @@ import ( "github.com/databricks/databricks-sdk-go/service/catalog" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/iam" + sdk_jobs "github.com/databricks/databricks-sdk-go/service/jobs" "golang.org/x/exp/slices" @@ -1562,3 +1563,12 @@ func isMatchingSecurableTypeAndName(ic *importContext, res *resource, ra *resour ra_name, _ := ra.Get("name") return ra.Type == ("databricks_"+res_securable_type) && ra_name.(string) == res_securable_name } + +func (ic *importContext) emitJobsDestinationNotifications(notifications []sdk_jobs.Webhook) { + for _, notification := range notifications { + ic.Emit(&resource{ + Resource: "databricks_notification_destination", + ID: notification.Id, + }) + } +} From 7342fa5f0baa99ea2e0ef0ad65ad492b863f2e31 Mon Sep 17 00:00:00 2001 From: Alex Ott Date: Thu, 8 Aug 2024 11:54:40 +0200 Subject: [PATCH 03/10] [Exporter] Ignore DLT pipelines deployed via DABs (#3857) ## Changes When we deploy resources with DABs it's expected that source code exists outside of the workspace, so we don't need to export it and related resources (libraries). Fixes #3436 ## Tests - [x] `make test` run locally - [ ] relevant change in `docs/` folder - [ ] covered with integration tests in `internal/acceptance` - [ ] relevant acceptance tests are passing - [x] using Go SDK --- exporter/importables.go | 33 ++++++++++++++++++++++----------- exporter/importables_test.go | 21 +++++++++++++++++++++ 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/exporter/importables.go b/exporter/importables.go index 98b4f8c93e..bc0f695160 100644 --- a/exporter/importables.go +++ b/exporter/importables.go @@ -33,6 +33,7 @@ import ( "github.com/databricks/terraform-provider-databricks/jobs" "github.com/databricks/terraform-provider-databricks/mws" "github.com/databricks/terraform-provider-databricks/permissions" + tfpipelines "github.com/databricks/terraform-provider-databricks/pipelines" "github.com/databricks/terraform-provider-databricks/repos" tfsettings "github.com/databricks/terraform-provider-databricks/settings" tfsharing "github.com/databricks/terraform-provider-databricks/sharing" @@ -2020,7 +2021,7 @@ var resourcesMap map[string]importable = map[string]importable{ return nil }, Import: func(ic *importContext, r *resource) error { - var pipeline pipelines.PipelineSpec + var pipeline tfpipelines.Pipeline s := ic.Resources["databricks_pipeline"].Schema common.DataToStructPointer(r.Data, s, &pipeline) if pipeline.Catalog != "" && pipeline.Target != "" { @@ -2029,15 +2030,17 @@ var resourcesMap map[string]importable = map[string]importable{ ID: pipeline.Catalog + "." + pipeline.Target, }) } - for _, lib := range pipeline.Libraries { - if lib.Notebook != nil { - ic.emitNotebookOrRepo(lib.Notebook.Path) - } - if lib.File != nil { - ic.emitNotebookOrRepo(lib.File.Path) + if pipeline.Deployment == nil || pipeline.Deployment.Kind == "BUNDLE" { + for _, lib := range pipeline.Libraries { + if lib.Notebook != nil { + ic.emitNotebookOrRepo(lib.Notebook.Path) + } + if lib.File != nil { + ic.emitNotebookOrRepo(lib.File.Path) + } + ic.emitIfDbfsFile(lib.Jar) + ic.emitIfDbfsFile(lib.Whl) } - ic.emitIfDbfsFile(lib.Jar) - ic.emitIfDbfsFile(lib.Whl) } // Emit clusters for _, cluster := range pipeline.Clusters { @@ -2090,9 +2093,17 @@ var resourcesMap map[string]importable = map[string]importable{ return defaultShouldOmitFieldFunc(ic, pathString, as, d) }, Ignore: func(ic *importContext, r *resource) bool { - numLibraries := r.Data.Get("library.#").(int) + var pipeline tfpipelines.Pipeline + s := ic.Resources["databricks_pipeline"].Schema + common.DataToStructPointer(r.Data, s, &pipeline) + if pipeline.Deployment != nil && pipeline.Deployment.Kind == "BUNDLE" { + log.Printf("[WARN] Ignoring DLT Pipeline with ID %s as deployed with DABs", r.ID) + ic.addIgnoredResource(fmt.Sprintf("databricks_pipeline. id=%s", r.ID)) + return true + } + numLibraries := len(pipeline.Libraries) if numLibraries == 0 { - log.Printf("[WARN] Ignoring DLT Pipeline with ID %s", r.ID) + log.Printf("[WARN] Ignoring DLT Pipeline with ID %s due to the lack of libraries", r.ID) ic.addIgnoredResource(fmt.Sprintf("databricks_pipeline. id=%s", r.ID)) } return numLibraries == 0 diff --git a/exporter/importables_test.go b/exporter/importables_test.go index 1904ab0d1d..61bc06d998 100644 --- a/exporter/importables_test.go +++ b/exporter/importables_test.go @@ -296,11 +296,32 @@ func TestRepoIgnore(t *testing.T) { func TestDLTIgnore(t *testing.T) { ic := importContextForTest() d := dlt_pipelines.ResourcePipeline().ToResource().TestResourceData() + scm := dlt_pipelines.ResourcePipeline().Schema + d.SetId("12345") r := &resource{ID: "12345", Data: d} // job without libraries assert.True(t, resourcesMap["databricks_pipeline"].Ignore(ic, r)) assert.Equal(t, 1, len(ic.ignoredResources)) + + // job deployed by DABs + d.MarkNewResource() + pipeline := dlt_pipelines.Pipeline{ + PipelineSpec: pipelines.PipelineSpec{ + Deployment: &pipelines.PipelineDeployment{ + Kind: "BUNDLE", + }, + }, + } + err := common.StructToData(pipeline, scm, d) + require.NoError(t, err) + + r = &resource{ID: "12345", Data: d} + for k := range ic.ignoredResources { + delete(ic.ignoredResources, k) + } + assert.True(t, resourcesMap["databricks_pipeline"].Ignore(ic, r)) + assert.Equal(t, 1, len(ic.ignoredResources)) } func TestJobsIgnore(t *testing.T) { From fb7e4efe6c4319df8d99a932fecbf7a713e418fd Mon Sep 17 00:00:00 2001 From: vuong-nguyen <44292934+nkvuong@users.noreply.github.com> Date: Fri, 9 Aug 2024 09:16:22 +0100 Subject: [PATCH 04/10] [Fix] Automatically assign `IS_OWNER` permission to sql warehouse if not specified (#3829) ## Changes - SQL warehouses supports specifying `IS_OWNER` permission and therefore requires the same workaround as jobs & pipelines. - Resolves #3730 ## Tests - [x] `make test` run locally - [x] relevant change in `docs/` folder - [x] covered with integration tests in `internal/acceptance` - [x] relevant acceptance tests are passing - [x] using Go SDK --- docs/resources/permissions.md | 4 + permissions/resource_permissions.go | 100 +++++++++++++++------ permissions/resource_permissions_test.go | 108 +++++++++++++++++++++-- 3 files changed, 181 insertions(+), 31 deletions(-) diff --git a/docs/resources/permissions.md b/docs/resources/permissions.md index 27cac375f0..fcd7b69b39 100644 --- a/docs/resources/permissions.md +++ b/docs/resources/permissions.md @@ -623,6 +623,10 @@ resource "databricks_permissions" "token_usage" { [SQL warehouses](https://docs.databricks.com/sql/user/security/access-control/sql-endpoint-acl.html) have four possible permissions: `CAN_USE`, `CAN_MONITOR`, `CAN_MANAGE` and `IS_OWNER`: +- The creator of a warehouse has `IS_OWNER` permission. Destroying `databricks_permissions` resource for a warehouse would revert ownership to the creator. +- A warehouse must have exactly one owner. If a resource is changed and no owner is specified, the currently authenticated principal would become the new owner of the warehouse. Nothing would change, per se, if the warehouse was created through Terraform. +- A warehouse cannot have a group as an owner. + ```hcl data "databricks_current_user" "me" {} diff --git a/permissions/resource_permissions.go b/permissions/resource_permissions.go index 0ad074a7be..897a8f2d57 100644 --- a/permissions/resource_permissions.go +++ b/permissions/resource_permissions.go @@ -133,6 +133,56 @@ func (a PermissionsAPI) shouldExplicitlyGrantCallingUserManagePermissions(object return isDbsqlPermissionsWorkaroundNecessary(objectID) } +func isOwnershipWorkaroundNecessary(objectID string) bool { + return strings.HasPrefix(objectID, "/jobs") || strings.HasPrefix(objectID, "/pipelines") || strings.HasPrefix(objectID, "/sql/warehouses") +} + +// Suppress the error if it is 404 +func ignoreNotFound(err error) error { + var apiErr *apierr.APIError + if !errors.As(err, &apiErr) { + return err + } + if apiErr.StatusCode == 404 { + return nil + } + if strings.Contains(apiErr.Message, "does not exist.") { + return nil + } + return err +} + +func (a PermissionsAPI) getObjectCreator(objectID string) (string, error) { + w, err := a.client.WorkspaceClient() + if err != nil { + return "", err + } + if strings.HasPrefix(objectID, "/jobs") { + jobId, err := strconv.ParseInt(strings.ReplaceAll(objectID, "/jobs/", ""), 10, 64) + if err != nil { + return "", err + } + job, err := w.Jobs.GetByJobId(a.context, jobId) + if err != nil { + return "", ignoreNotFound(err) + } + return job.CreatorUserName, nil + } else if strings.HasPrefix(objectID, "/pipelines") { + pipeline, err := w.Pipelines.GetByPipelineId(a.context, strings.ReplaceAll(objectID, "/pipelines/", "")) + if err != nil { + return "", ignoreNotFound(err) + } + return pipeline.CreatorUserName, nil + } else if strings.HasPrefix(objectID, "/sql/warehouses") { + warehouse, err := w.Warehouses.GetById(a.context, strings.ReplaceAll(objectID, "/sql/warehouses/", "")) + if err != nil { + return "", ignoreNotFound(err) + } + return warehouse.CreatorName, nil + } + return "", nil +} + func (a PermissionsAPI) ensureCurrentUserCanManageObject(objectID string, objectACL AccessControlChangeList) (AccessControlChangeList, error) { if !a.shouldExplicitlyGrantCallingUserManagePermissions(objectID) { return objectACL, nil @@ -168,6 +218,19 @@ func (a PermissionsAPI) put(objectID string, objectACL AccessControlChangeList) return a.client.Put(a.context, urlPathForObjectID(objectID), objectACL) } +// safePutWithOwner is a workaround for the limitation where warehouse without owners cannot have IS_OWNER set +func (a PermissionsAPI) safePutWithOwner(objectID string, objectACL AccessControlChangeList, originalAcl []AccessControlChange) error { + err := a.put(objectID, objectACL) + if err != nil { + if strings.Contains(err.Error(), "with no existing owner must provide a new owner") { + objectACL.AccessControlList = originalAcl + return a.put(objectID, objectACL) + } + return err + } + return nil +} + // Update updates object permissions. Technically, it's using method named SetOrDelete, but here we do more func (a PermissionsAPI) Update(objectID string, objectACL AccessControlChangeList) error { if objectID == "/authorization/tokens" || objectID == "/registered-models/root" || objectID == "/directories/0" { @@ -177,7 +240,9 @@ func (a PermissionsAPI) Update(objectID string, objectACL AccessControlChangeLis PermissionLevel: "CAN_MANAGE", }) } - if strings.HasPrefix(objectID, "/jobs") || strings.HasPrefix(objectID, "/pipelines") { + originalAcl := make([]AccessControlChange, len(objectACL.AccessControlList)) + _ = copy(originalAcl, objectACL.AccessControlList) + if isOwnershipWorkaroundNecessary(objectID) { owners := 0 for _, acl := range objectACL.AccessControlList { if acl.PermissionLevel == "IS_OWNER" { @@ -200,7 +265,7 @@ func (a PermissionsAPI) Update(objectID string, objectACL AccessControlChangeLis }) } } - return a.put(objectID, objectACL) + return a.safePutWithOwner(objectID, objectACL, originalAcl) } // Delete gracefully removes permissions. Technically, it's using method named SetOrDelete, but here we do more @@ -218,37 +283,22 @@ func (a PermissionsAPI) Delete(objectID string) error { } } } - w, err := a.client.WorkspaceClient() - if err != nil { - return err - } - if strings.HasPrefix(objectID, "/jobs") { - jobId, err := strconv.ParseInt(strings.ReplaceAll(objectID, "/jobs/", ""), 10, 64) + originalAcl := make([]AccessControlChange, len(accl.AccessControlList)) + _ = copy(originalAcl, accl.AccessControlList) + if isOwnershipWorkaroundNecessary(objectID) { + creator, err := a.getObjectCreator(objectID) if err != nil { return err } - job, err := w.Jobs.GetByJobId(a.context, jobId) - if err != nil { - if strings.HasSuffix(err.Error(), " does not exist.") { - return nil - } - return err - } - accl.AccessControlList = append(accl.AccessControlList, AccessControlChange{ - UserName: job.CreatorUserName, - PermissionLevel: "IS_OWNER", - }) - } else if strings.HasPrefix(objectID, "/pipelines") { - job, err := w.Pipelines.GetByPipelineId(a.context, strings.ReplaceAll(objectID, "/pipelines/", "")) - if err != nil { - return err + if creator == "" { + return nil } accl.AccessControlList = append(accl.AccessControlList, AccessControlChange{ - UserName: job.CreatorUserName, + UserName: creator, PermissionLevel: "IS_OWNER", }) } - return a.put(objectID, accl) + return a.safePutWithOwner(objectID, accl, originalAcl) } // Read gets all relevant permissions for the object, including inherited ones diff --git a/permissions/resource_permissions_test.go b/permissions/resource_permissions_test.go index 80205924a6..b01fddb1ca 100644 --- a/permissions/resource_permissions_test.go +++ b/permissions/resource_permissions_test.go @@ -410,7 +410,7 @@ func TestResourcePermissionsRead_NotFound(t *testing.T) { { Method: http.MethodGet, Resource: "/api/2.0/permissions/clusters/abc", - Response: common.APIErrorBody{ + Response: apierr.APIError{ ErrorCode: "NOT_FOUND", Message: "Cluster does not exist", }, @@ -432,7 +432,7 @@ func TestResourcePermissionsRead_some_error(t *testing.T) { { Method: http.MethodGet, Resource: "/api/2.0/permissions/clusters/abc", - Response: common.APIErrorBody{ + Response: apierr.APIError{ ErrorCode: "INVALID_REQUEST", Message: "Internal error happened", }, @@ -498,7 +498,7 @@ func TestResourcePermissionsRead_ErrorOnScimMe(t *testing.T) { { Method: http.MethodGet, Resource: "/api/2.0/preview/scim/v2/Me", - Response: common.APIErrorBody{ + Response: apierr.APIError{ ErrorCode: "INVALID_REQUEST", Message: "Internal error happened", }, @@ -652,7 +652,7 @@ func TestResourcePermissionsDelete_error(t *testing.T) { }, }, }, - Response: common.APIErrorBody{ + Response: apierr.APIError{ ErrorCode: "INVALID_REQUEST", Message: "Internal error happened", }, @@ -860,6 +860,10 @@ func TestResourcePermissionsCreate_SQLA_Endpoint(t *testing.T) { UserName: TestingUser, PermissionLevel: "CAN_USE", }, + { + UserName: TestingAdminUser, + PermissionLevel: "IS_OWNER", + }, { UserName: TestingAdminUser, PermissionLevel: "CAN_MANAGE", @@ -878,6 +882,98 @@ func TestResourcePermissionsCreate_SQLA_Endpoint(t *testing.T) { UserName: TestingUser, PermissionLevel: "CAN_USE", }, + { + UserName: TestingAdminUser, + PermissionLevel: "IS_OWNER", + }, + { + UserName: TestingAdminUser, + PermissionLevel: "CAN_MANAGE", + }, + }, + }, + }, + }, + Resource: ResourcePermissions(), + State: map[string]any{ + "sql_endpoint_id": "abc", + "access_control": []any{ + map[string]any{ + "user_name": TestingUser, + "permission_level": "CAN_USE", + }, + }, + }, + Create: true, + }.Apply(t) + assert.NoError(t, err) + ac := d.Get("access_control").(*schema.Set) + require.Equal(t, 1, len(ac.List())) + firstElem := ac.List()[0].(map[string]any) + assert.Equal(t, TestingUser, firstElem["user_name"]) + assert.Equal(t, "CAN_USE", firstElem["permission_level"]) +} + +func TestResourcePermissionsCreate_SQLA_Endpoint_WithOwnerError(t *testing.T) { + d, err := qa.ResourceFixture{ + Fixtures: []qa.HTTPFixture{ + me, + { + Method: "PUT", + Resource: "/api/2.0/permissions/sql/warehouses/abc", + ExpectedRequest: AccessControlChangeList{ + AccessControlList: []AccessControlChange{ + { + UserName: TestingUser, + PermissionLevel: "CAN_USE", + }, + { + UserName: TestingAdminUser, + PermissionLevel: "IS_OWNER", + }, + { + UserName: TestingAdminUser, + PermissionLevel: "CAN_MANAGE", + }, + }, + }, + Response: apierr.APIError{ + ErrorCode: "INVALID_PARAMETER_VALUE", + Message: "PUT requests for warehouse *** with no existing owner must provide a new owner.", + }, + Status: 400, + }, + { + Method: "PUT", + Resource: "/api/2.0/permissions/sql/warehouses/abc", + ExpectedRequest: AccessControlChangeList{ + AccessControlList: []AccessControlChange{ + { + UserName: TestingUser, + PermissionLevel: "CAN_USE", + }, + { + UserName: TestingAdminUser, + PermissionLevel: "CAN_MANAGE", + }, + }, + }, + }, + { + Method: http.MethodGet, + Resource: "/api/2.0/permissions/sql/warehouses/abc", + Response: ObjectACL{ + ObjectID: "dashboards/abc", + ObjectType: "dashboard", + AccessControlList: []AccessControl{ + { + UserName: TestingUser, + PermissionLevel: "CAN_USE", + }, + { + UserName: TestingAdminUser, + PermissionLevel: "IS_OWNER", + }, { UserName: TestingAdminUser, PermissionLevel: "CAN_MANAGE", @@ -1003,7 +1099,7 @@ func TestResourcePermissionsCreate_NotebookPath_NotExists(t *testing.T) { { Method: http.MethodGet, Resource: "/api/2.0/workspace/get-status?path=%2FDevelopment%2FInit", - Response: common.APIErrorBody{ + Response: apierr.APIError{ ErrorCode: "INVALID_REQUEST", Message: "Internal error happened", }, @@ -1181,7 +1277,7 @@ func TestResourcePermissionsCreate_error(t *testing.T) { { Method: http.MethodPut, Resource: "/api/2.0/permissions/clusters/abc", - Response: common.APIErrorBody{ + Response: apierr.APIError{ ErrorCode: "INVALID_REQUEST", Message: "Internal error happened", }, From 0564da95657c479a8d7c8a7ee10b0fea03ec12ad Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 9 Aug 2024 10:32:55 +0200 Subject: [PATCH 05/10] [Dependency] Bump github.com/zclconf/go-cty from 1.14.4 to 1.15.0 (#3775) Bumps [github.com/zclconf/go-cty](https://github.com/zclconf/go-cty) from 1.14.4 to 1.15.0.
Changelog

Sourced from github.com/zclconf/go-cty's changelog.

1.15.0 (July 15, 2024)

  • function/stdlib: The element function now accepts negative indices, extending the illusion of an infinitely-long list into the negative direction too.
  • cty: The various "transform" functions were previously incorrectly propagating marks from a parent object down to attribute values when calling the caller-provided transform functions. The marks will now no longer be propagated downwards, which is consistent with the treatment of collection and tuple elements. If your transform function needs to take into account context about marks of upstream containers then you can maintain a stack of active marks in your Transformer implementation, pushing in Enter and popping in Exit. (#185)
Commits
  • d4f2633 v1.15.0 release
  • 0cccfb9 Update CHANGELOG.md
  • 043bf38 cty: use unmarked val when Transform walks object attrs
  • 7b73cce function/stdlib: element accepts negative indices
  • 15a9d85 Prepare for a possible future v1.14.5 release
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/zclconf/go-cty&package-manager=go_modules&previous-version=1.14.4&new-version=1.15.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 4317c1c65c..ccdb138a9e 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ require ( github.com/hashicorp/terraform-plugin-log v0.9.0 github.com/hashicorp/terraform-plugin-sdk/v2 v2.34.0 github.com/stretchr/testify v1.9.0 - github.com/zclconf/go-cty v1.14.4 + github.com/zclconf/go-cty v1.15.0 golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 ) diff --git a/go.sum b/go.sum index 04d92c2cab..baf1f46f81 100644 --- a/go.sum +++ b/go.sum @@ -204,8 +204,8 @@ github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM= github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -github.com/zclconf/go-cty v1.14.4 h1:uXXczd9QDGsgu0i/QFR/hzI5NYCHLf6NQw/atrbnhq8= -github.com/zclconf/go-cty v1.14.4/go.mod h1:VvMs5i0vgZdhYawQNq5kePSpLAoz8u1xvZgrPIxfnZE= +github.com/zclconf/go-cty v1.15.0 h1:tTCRWxsexYUmtt/wVxgDClUe+uQusuI443uL6e+5sXQ= +github.com/zclconf/go-cty v1.15.0/go.mod h1:VvMs5i0vgZdhYawQNq5kePSpLAoz8u1xvZgrPIxfnZE= github.com/zclconf/go-cty-debug v0.0.0-20240509010212-0d6042c53940 h1:4r45xpDWB6ZMSMNJFMOjqrGHynW3DIBuR2H9j0ug+Mo= github.com/zclconf/go-cty-debug v0.0.0-20240509010212-0d6042c53940/go.mod h1:CmBdvvj3nqzfzJ6nTCIwDTPZ56aVGvDrmztiO5g3qrM= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= From a9293e9df6632db23c62c3f08d7f1031b636fcee Mon Sep 17 00:00:00 2001 From: Alex Ott Date: Fri, 9 Aug 2024 18:52:48 +0200 Subject: [PATCH 06/10] [Internal] Refactor exporter: split huge files into smaller ones (#3870) ## Changes This PR splits `context.go` and `util.go` into smaller files, filled with related functions, like, utils for workspace objects, utils for SCIM (users/sps/groups), ... ## Tests - [x] `make test` run locally - [ ] relevant change in `docs/` folder - [ ] covered with integration tests in `internal/acceptance` - [ ] relevant acceptance tests are passing - [ ] using Go SDK --- exporter/codegen.go | 1060 +++++++++++++++++++++++++++++++++++ exporter/context.go | 1070 ------------------------------------ exporter/context_test.go | 8 +- exporter/util.go | 1006 +-------------------------------- exporter/util_compute.go | 228 ++++++++ exporter/util_scim.go | 392 +++++++++++++ exporter/util_workspace.go | 400 ++++++++++++++ 7 files changed, 2098 insertions(+), 2066 deletions(-) create mode 100644 exporter/codegen.go create mode 100644 exporter/util_compute.go create mode 100644 exporter/util_scim.go create mode 100644 exporter/util_workspace.go diff --git a/exporter/codegen.go b/exporter/codegen.go new file mode 100644 index 0000000000..dee6f12fd4 --- /dev/null +++ b/exporter/codegen.go @@ -0,0 +1,1060 @@ +package exporter + +import ( + "encoding/json" + "errors" + "fmt" + "log" + "os" + "reflect" + "regexp" + "sort" + "strconv" + "strings" + "sync" + "time" + + "github.com/databricks/terraform-provider-databricks/workspace" + "github.com/hashicorp/hcl/v2" + "github.com/hashicorp/hcl/v2/hclsyntax" + "github.com/hashicorp/hcl/v2/hclwrite" + "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" + "github.com/hashicorp/terraform-plugin-sdk/v2/terraform" + "github.com/zclconf/go-cty/cty" +) + +// TODO: move to IC +var dependsRe = regexp.MustCompile(`(\.[\d]+)`) + +func (ic *importContext) generateVariableName(attrName, name string) string { + return fmt.Sprintf("%s_%s", attrName, name) +} + +func maybeAddQuoteCharacter(s string) string { + s = strings.ReplaceAll(s, "\\", "\\\\") + s = strings.ReplaceAll(s, "\"", "\\\"") + return s +} + +func genTraversalTokens(sr *resourceApproximation, pick string) hcl.Traversal { + if sr.Mode == "data" { + return hcl.Traversal{ + hcl.TraverseRoot{Name: "data"}, + hcl.TraverseAttr{Name: sr.Type}, + hcl.TraverseAttr{Name: sr.Name}, + hcl.TraverseAttr{Name: pick}, + } + } + return hcl.Traversal{ + hcl.TraverseRoot{Name: sr.Type}, + hcl.TraverseAttr{Name: sr.Name}, + hcl.TraverseAttr{Name: pick}, + } +} + +func (ic *importContext) isIgnoredResourceApproximation(ra *resourceApproximation) bool { + var ignored bool + if ra != nil && ra.Resource != nil { + ignoreFunc := ic.Importables[ra.Type].Ignore + if ignoreFunc != nil && ignoreFunc(ic, ra.Resource) { + log.Printf("[WARN] Found reference to the ignored resource %s: %s", ra.Type, ra.Name) + return true + } + } + return ignored +} + +func (ic *importContext) Find(value, attr string, ref reference, origResource *resource, origPath string) (string, hcl.Traversal, bool) { + log.Printf("[DEBUG] Starting searching for reference for resource %s, attr='%s', value='%s', ref=%v", + ref.Resource, attr, value, ref) + // optimize performance by avoiding doing regexp matching multiple times + matchValue := "" + switch ref.MatchType { + case MatchRegexp: + if ref.Regexp == nil { + log.Printf("[WARN] you must provide regular expression for 'regexp' match type") + return "", nil, false + } + res := ref.Regexp.FindStringSubmatch(value) + if len(res) < 2 { + log.Printf("[WARN] no match for regexp: %v in string %s", ref.Regexp, value) + return "", nil, false + } + matchValue = res[1] + case MatchCaseInsensitive: + matchValue = strings.ToLower(value) // performance optimization to avoid doing it in the loop + case MatchExact, MatchDefault: + matchValue = value + case MatchPrefix, MatchLongestPrefix: + if ref.MatchValueTransformFunc != nil { + matchValue = ref.MatchValueTransformFunc(value) + } else { + matchValue = value + } + } + // doing explicit lookup in the state. For case insensitive matches, first attempt to lookup for the value, + // and do iteration if it's not found + if (ref.MatchType == MatchExact || ref.MatchType == MatchDefault || ref.MatchType == MatchRegexp || + ref.MatchType == MatchCaseInsensitive) && !ref.SkipDirectLookup { + sr := ic.State.Get(ref.Resource, attr, matchValue) + if sr != nil && (ref.IsValidApproximation == nil || ref.IsValidApproximation(ic, origResource, sr, origPath)) && + !ic.isIgnoredResourceApproximation(sr) { + log.Printf("[DEBUG] Finished direct lookup for reference for resource %s, attr='%s', value='%s', ref=%v. Found: type=%s name=%s", + ref.Resource, attr, value, ref, sr.Type, sr.Name) + return matchValue, genTraversalTokens(sr, attr), sr.Mode == "data" + } + if ref.MatchType != MatchCaseInsensitive { // for case-insensitive matching we'll try iteration + log.Printf("[DEBUG] Finished direct lookup for reference for resource %s, attr='%s', value='%s', ref=%v. Not found", + ref.Resource, attr, value, ref) + return "", nil, false + } + } else if ref.MatchType == MatchLongestPrefix && ref.ExtraLookupKey != "" { + extraKeyValue, exists := origResource.GetExtraData(ref.ExtraLookupKey) + if exists && extraKeyValue.(string) != "" { + sr := ic.State.Get(ref.Resource, attr, extraKeyValue.(string)) + if sr != nil && (ref.IsValidApproximation == nil || ref.IsValidApproximation(ic, origResource, sr, origPath)) && + !ic.isIgnoredResourceApproximation(sr) { + log.Printf("[DEBUG] Finished direct lookup by key %s for reference for resource %s, attr='%s', value='%s', ref=%v. Found: type=%s name=%s", + ref.ExtraLookupKey, ref.Resource, attr, value, ref, sr.Type, sr.Name) + return extraKeyValue.(string), genTraversalTokens(sr, attr), sr.Mode == "data" + } + } + } + + maxPrefixLen := 0 + maxPrefixOrigValue := "" + var maxPrefixResource *resourceApproximation + srs := *ic.State.Resources(ref.Resource) + for _, sr := range srs { + for _, i := range sr.Instances { + v := i.Attributes[attr] + if v == nil { + log.Printf("[WARN] Can't find instance attribute '%v' in resource: '%v'", attr, ref.Resource) + continue + } + strValue := v.(string) + origValue := strValue + if ref.SearchValueTransformFunc != nil { + strValue = ref.SearchValueTransformFunc(strValue) + log.Printf("[TRACE] Resource %s. Transformed value from '%s' to '%s'", ref.Resource, origValue, strValue) + } + matched := false + switch ref.MatchType { + case MatchCaseInsensitive: + matched = (strings.ToLower(strValue) == matchValue) + case MatchPrefix: + matched = strings.HasPrefix(matchValue, strValue) + case MatchLongestPrefix: + if strings.HasPrefix(matchValue, strValue) && len(origValue) > maxPrefixLen && !ic.isIgnoredResourceApproximation(sr) { + maxPrefixLen = len(origValue) + maxPrefixOrigValue = origValue + maxPrefixResource = sr + } + case MatchExact, MatchDefault: + matched = (strValue == matchValue) + default: + log.Printf("[WARN] Unsupported match type: %s", ref.MatchType) + } + if !matched || (ref.IsValidApproximation != nil && !ref.IsValidApproximation(ic, origResource, sr, origPath)) || + ic.isIgnoredResourceApproximation(sr) { + continue + } + log.Printf("[DEBUG] Finished searching for reference for resource %s, attr='%s', value='%s', ref=%v. Found: type=%s name=%s", + ref.Resource, attr, value, ref, sr.Type, sr.Name) + return origValue, genTraversalTokens(sr, attr), sr.Mode == "data" + } + } + if ref.MatchType == MatchLongestPrefix && maxPrefixResource != nil && + (ref.IsValidApproximation == nil || ref.IsValidApproximation(ic, origResource, maxPrefixResource, origPath)) && + !ic.isIgnoredResourceApproximation(maxPrefixResource) { + log.Printf("[DEBUG] Finished searching longest prefix for reference for resource %s, attr='%s', value='%s', ref=%v. Found: type=%s name=%s", + ref.Resource, attr, value, ref, maxPrefixResource.Type, maxPrefixResource.Name) + return maxPrefixOrigValue, genTraversalTokens(maxPrefixResource, attr), maxPrefixResource.Mode == "data" + } + log.Printf("[DEBUG] Finished searching for reference for resource %s, pick=%s, ref=%v. Not found", ref.Resource, attr, ref) + return "", nil, false +} + +func (ic *importContext) getTraversalTokens(ref reference, value string, origResource *resource, origPath string) (hclwrite.Tokens, bool) { + matchType := ref.MatchTypeValue() + attr := ref.MatchAttribute() + attrValue, traversal, isData := ic.Find(value, attr, ref, origResource, origPath) + // at least one invocation of ic.Find will assign Nil to traversal if resource with value is not found + if traversal == nil { + return nil, isData + } + // capture if it's data? + switch matchType { + case MatchExact, MatchDefault, MatchCaseInsensitive: + return hclwrite.TokensForTraversal(traversal), isData + case MatchPrefix, MatchLongestPrefix: + rest := value[len(attrValue):] + tokens := hclwrite.Tokens{&hclwrite.Token{Type: hclsyntax.TokenOQuote, Bytes: []byte{'"', '$', '{'}}} + tokens = append(tokens, hclwrite.TokensForTraversal(traversal)...) + tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenCQuote, Bytes: []byte{'}'}}) + tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenQuotedLit, Bytes: []byte(maybeAddQuoteCharacter(rest))}) + tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenCQuote, Bytes: []byte{'"'}}) + return tokens, isData + case MatchRegexp: + indices := ref.Regexp.FindStringSubmatchIndex(value) + if len(indices) == 4 { + tokens := hclwrite.Tokens{&hclwrite.Token{Type: hclsyntax.TokenOQuote, Bytes: []byte{'"'}}} + tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenQuotedLit, Bytes: []byte(maybeAddQuoteCharacter(value[0:indices[2]]))}) + tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenOQuote, Bytes: []byte{'$', '{'}}) + tokens = append(tokens, hclwrite.TokensForTraversal(traversal)...) + tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenCQuote, Bytes: []byte{'}'}}) + tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenQuotedLit, Bytes: []byte(maybeAddQuoteCharacter(value[indices[3]:]))}) + tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenCQuote, Bytes: []byte{'"'}}) + return tokens, isData + } + log.Printf("[WARN] Can't match found data in '%s'. Indices: %v", value, indices) + default: + log.Printf("[WARN] Unsupported match type: %s", ref.MatchType) + } + return nil, false +} + +func (ic *importContext) reference(i importable, path []string, value string, ctyValue cty.Value, origResource *resource) hclwrite.Tokens { + pathString := strings.Join(path, ".") + match := dependsRe.ReplaceAllString(pathString, "") + // get reference candidate, but if it's a `data`, then look for another non-data reference if possible.. + var dataTokens hclwrite.Tokens + for _, d := range i.Depends { + if d.Path != match { + continue + } + if d.File { + relativeFile := fmt.Sprintf("${path.module}/%s", value) + return hclwrite.Tokens{ + &hclwrite.Token{Type: hclsyntax.TokenOQuote, Bytes: []byte{'"'}}, + &hclwrite.Token{Type: hclsyntax.TokenQuotedLit, Bytes: []byte(relativeFile)}, + &hclwrite.Token{Type: hclsyntax.TokenCQuote, Bytes: []byte{'"'}}, + } + } + if d.Variable { + varName := ic.generateVariableName(path[0], value) + return ic.variable(varName, "") + } + + tokens, isData := ic.getTraversalTokens(d, value, origResource, pathString) + if tokens != nil { + if isData { + dataTokens = tokens + log.Printf("[DEBUG] Got reference to data for dependency %v", d) + } else { + return tokens + } + } + } + if len(dataTokens) > 0 { + return dataTokens + } + return hclwrite.TokensForValue(ctyValue) +} + +func (ic *importContext) variable(name, desc string) hclwrite.Tokens { + ic.variablesLock.Lock() + ic.variables[name] = desc + ic.variablesLock.Unlock() + return hclwrite.TokensForTraversal(hcl.Traversal{ + hcl.TraverseRoot{Name: "var"}, + hcl.TraverseAttr{Name: name}, + }) +} + +type fieldTuple struct { + Field string + Schema *schema.Schema +} + +func (ic *importContext) dataToHcl(i importable, path []string, + pr *schema.Resource, res *resource, body *hclwrite.Body) error { + d := res.Data + ss := []fieldTuple{} + for a, as := range pr.Schema { + ss = append(ss, fieldTuple{a, as}) + } + sort.Slice(ss, func(i, j int) bool { + // it just happens that reverse field order + // makes the most beautiful configs + return ss[i].Field > ss[j].Field + }) + var_cnt := 0 + for _, tuple := range ss { + a, as := tuple.Field, tuple.Schema + pathString := strings.Join(append(path, a), ".") + raw, nonZero := d.GetOk(pathString) + // log.Printf("[DEBUG] path=%s, raw='%v'", pathString, raw) + if i.ShouldOmitField == nil { // we don't have custom function, so skip computed & default fields + if defaultShouldOmitFieldFunc(ic, pathString, as, d) { + continue + } + } else if i.ShouldOmitField(ic, pathString, as, d) { + continue + } + mpath := dependsRe.ReplaceAllString(pathString, "") + for _, ref := range i.Depends { + if ref.Path == mpath && ref.Variable { + // sensitive fields are moved to variable depends, variable name is normalized + // TODO: handle a case when we have multiple blocks, so names won't be unique + raw = ic.regexFix(ic.ResourceName(res), simpleNameFixes) + if var_cnt > 0 { + raw = fmt.Sprintf("%s_%d", raw, var_cnt) + } + nonZero = true + var_cnt++ + } + } + shouldSkip := !nonZero + if as.Required { // for required fields we must produce a value, even empty... + shouldSkip = false + } else if as.Default != nil && !reflect.DeepEqual(raw, as.Default) { + // In case when have zero value, but there is non-zero default, we also need to produce it + shouldSkip = false + } + if shouldSkip && (i.ShouldGenerateField == nil || !i.ShouldGenerateField(ic, pathString, as, d)) { + continue + } + switch as.Type { + case schema.TypeString: + value := raw.(string) + tokens := ic.reference(i, append(path, a), value, cty.StringVal(value), res) + body.SetAttributeRaw(a, tokens) + case schema.TypeBool: + body.SetAttributeValue(a, cty.BoolVal(raw.(bool))) + case schema.TypeInt: + var num int64 + switch iv := raw.(type) { + case int: + num = int64(iv) + case int32: + num = int64(iv) + case int64: + num = iv + } + body.SetAttributeRaw(a, ic.reference(i, append(path, a), + strconv.FormatInt(num, 10), cty.NumberIntVal(num), res)) + case schema.TypeFloat: + body.SetAttributeValue(a, cty.NumberFloatVal(raw.(float64))) + case schema.TypeMap: + // TODO: Resolve references in maps as well, and also support different types inside map... + ov := map[string]cty.Value{} + for key, iv := range raw.(map[string]any) { + v := cty.StringVal(fmt.Sprintf("%v", iv)) + ov[key] = v + } + body.SetAttributeValue(a, cty.ObjectVal(ov)) + case schema.TypeSet: + if rawSet, ok := raw.(*schema.Set); ok { + rawList := rawSet.List() + err := ic.readListFromData(i, append(path, a), res, rawList, body, as, func(i int) string { + return strconv.Itoa(rawSet.F(rawList[i])) + }) + if err != nil { + return err + } + } + case schema.TypeList: + if rawList, ok := raw.([]any); ok { + err := ic.readListFromData(i, append(path, a), res, rawList, body, as, strconv.Itoa) + if err != nil { + return err + } + } + default: + return fmt.Errorf("unsupported schema type: %v", path) + } + } + // Generate `depends_on` only for top-level resource because `dataToHcl` is called recursively + if len(path) == 0 && len(res.DependsOn) > 0 { + notIgnoredResources := []*resource{} + for _, dr := range res.DependsOn { + dr := dr + if dr.Data == nil { + tdr := ic.Scope.FindById(dr.Resource, dr.ID) + if tdr == nil { + log.Printf("[WARN] can't find resource %s in scope", dr) + continue + } + dr = tdr + } + if ic.Importables[dr.Resource].Ignore == nil || !ic.Importables[dr.Resource].Ignore(ic, dr) { + found := false + for _, v := range notIgnoredResources { + if v.ID == dr.ID && v.Resource == dr.Resource { + found = true + break + } + } + if !found { + notIgnoredResources = append(notIgnoredResources, dr) + } + } + } + if len(notIgnoredResources) > 0 { + toks := hclwrite.Tokens{} + toks = append(toks, &hclwrite.Token{ + Type: hclsyntax.TokenOBrack, + Bytes: []byte{'['}, + }) + for i, dr := range notIgnoredResources { + if i > 0 { + toks = append(toks, &hclwrite.Token{ + Type: hclsyntax.TokenComma, + Bytes: []byte{','}, + }) + } + toks = append(toks, hclwrite.TokensForTraversal(hcl.Traversal{ + hcl.TraverseRoot{Name: dr.Resource}, + hcl.TraverseAttr{Name: ic.ResourceName(dr)}, + })...) + } + toks = append(toks, &hclwrite.Token{ + Type: hclsyntax.TokenCBrack, + Bytes: []byte{']'}, + }) + body.SetAttributeRaw("depends_on", toks) + } + } + return nil +} + +func (ic *importContext) readListFromData(i importable, path []string, res *resource, + rawList []any, body *hclwrite.Body, as *schema.Schema, offsetConverter func(i int) string) error { + if len(rawList) == 0 { + return nil + } + name := path[len(path)-1] + switch elem := as.Elem.(type) { + case *schema.Resource: + if as.MaxItems == 1 { + nestedPath := append(path, offsetConverter(0)) + confBlock := body.AppendNewBlock(name, []string{}) + return ic.dataToHcl(i, nestedPath, elem, res, confBlock.Body()) + } + for offset := range rawList { + confBlock := body.AppendNewBlock(name, []string{}) + nestedPath := append(path, offsetConverter(offset)) + err := ic.dataToHcl(i, nestedPath, elem, res, confBlock.Body()) + if err != nil { + return err + } + } + case *schema.Schema: + toks := hclwrite.Tokens{} + toks = append(toks, &hclwrite.Token{ + Type: hclsyntax.TokenOBrack, + Bytes: []byte{'['}, + }) + for _, raw := range rawList { + if len(toks) != 1 { + toks = append(toks, &hclwrite.Token{ + Type: hclsyntax.TokenComma, + Bytes: []byte{','}, + }) + } + switch x := raw.(type) { + case string: + value := raw.(string) + toks = append(toks, ic.reference(i, path, value, cty.StringVal(value), res)...) + case int: + // probably we don't even use integer lists?... + toks = append(toks, hclwrite.TokensForValue( + cty.NumberIntVal(int64(x)))...) + default: + return fmt.Errorf("unsupported primitive list: %#v", path) + } + } + toks = append(toks, &hclwrite.Token{ + Type: hclsyntax.TokenCBrack, + Bytes: []byte{']'}, + }) + body.SetAttributeRaw(name, toks) + } + return nil +} + +func (ic *importContext) generateTfvars() error { + // TODO: make it incremental as well... + if len(ic.tfvars) == 0 { + return nil + } + f := hclwrite.NewEmptyFile() + body := f.Body() + fileName := fmt.Sprintf("%s/terraform.tfvars", ic.Directory) + + vf, err := os.Create(fileName) + if err != nil { + return err + } + defer vf.Close() + + for k, v := range ic.tfvars { + body.SetAttributeValue(k, cty.StringVal(v)) + } + // nolint + vf.Write(f.Bytes()) + log.Printf("[INFO] Written %d tfvars", len(ic.tfvars)) + + ic.generateGitIgnore() + + return nil +} + +func (ic *importContext) generateVariables() error { + if len(ic.variables) == 0 { + return nil + } + f := hclwrite.NewEmptyFile() + body := f.Body() + fileName := fmt.Sprintf("%s/vars.tf", ic.Directory) + if ic.incremental { + content, err := os.ReadFile(fileName) + if err == nil { + ftmp, diags := hclwrite.ParseConfig(content, fileName, hcl.Pos{Line: 1, Column: 1}) + if diags.HasErrors() { + log.Printf("[ERROR] parsing of existing file failed: %s", diags) + } else { + tbody := ftmp.Body() + for _, block := range tbody.Blocks() { + typ := block.Type() + labels := block.Labels() + log.Printf("[DEBUG] blockBody: %v %v\n", typ, labels) + _, present := ic.variables[labels[0]] + if typ == "variable" && present { + log.Printf("[DEBUG] Ignoring variable '%s' that will be re-exported", labels[0]) + } else { + log.Printf("[DEBUG] Adding not exported object. type='%s', labels=%v", typ, labels) + body.AppendBlock(block) + } + } + } + } else { + log.Printf("[ERROR] opening file %s", fileName) + } + } + vf, err := os.Create(fileName) + if err != nil { + return err + } + defer vf.Close() + + for k, v := range ic.variables { + b := body.AppendNewBlock("variable", []string{k}).Body() + b.SetAttributeValue("description", cty.StringVal(v)) + } + // nolint + vf.Write(f.Bytes()) + log.Printf("[INFO] Written %d variables", len(ic.variables)) + return nil +} + +func (ic *importContext) generateGitIgnore() { + fileName := fmt.Sprintf("%s/.gitignore", ic.Directory) + vf, err := os.Create(fileName) + if err != nil { + log.Printf("[ERROR] can't create %s: %v", fileName, err) + return + } + defer vf.Close() + // nolint + vf.Write([]byte("terraform.tfvars\n")) +} + +func (ic *importContext) generateAndWriteResources(sh *os.File) { + resources := ic.Scope.Sorted() + scopeSize := ic.Scope.Len() + t1 := time.Now() + log.Printf("[INFO] Generating configuration for %d resources", scopeSize) + + // make configurable via environment variables + resourceHandlersNumber := getEnvAsInt("EXPORTER_RESOURCE_GENERATORS", 50) + resourcesChan := make(resourceChannel, defaultChannelSize) + + resourceWriters := make(map[string]dataWriteChannel, len(ic.Resources)) + for service := range ic.services { + resourceWriters[service] = make(dataWriteChannel, defaultChannelSize) + } + writersWaitGroup := &sync.WaitGroup{} + // write shell script for importing + shellImportChan := make(importWriteChannel, defaultChannelSize) + writersWaitGroup.Add(1) + go func() { + ic.writeShellImports(sh, shellImportChan) + writersWaitGroup.Done() + }() + // + nativeImportChan := make(importWriteChannel, defaultChannelSize) + writersWaitGroup.Add(1) + go func() { + ic.writeNativeImports(nativeImportChan) + writersWaitGroup.Done() + }() + // start resource handlers + for i := 0; i < resourceHandlersNumber; i++ { + i := i + go func() { + log.Printf("[DEBUG] Starting resource handler %d", i) + ic.processSingleResource(resourcesChan, resourceWriters, nativeImportChan) + }() + } + // start writers for specific services + for service, ch := range resourceWriters { + service := service + ch := ch + generatedFile := fmt.Sprintf("%s/%s.tf", ic.Directory, service) + log.Printf("[DEBUG] starting writer for service %s", service) + writersWaitGroup.Add(1) + go func() { + ic.handleResourceWrite(generatedFile, ch, shellImportChan) + writersWaitGroup.Done() + }() + } + // submit all extracted resources... + for i, r := range resources { + ic.waitGroup.Add(1) + resourcesChan <- r + if i%500 == 0 { + log.Printf("[INFO] Submitted %d of %d resources", i+1, scopeSize) + } + } + ic.waitGroup.Wait() + // close all channels + close(shellImportChan) + close(nativeImportChan) + close(resourcesChan) + for service, ch := range resourceWriters { + log.Printf("Closing writer for service %s", service) + close(ch) + } + writersWaitGroup.Wait() + + log.Printf("[INFO] Finished generation of configuration for %d resources (took %v seconds)", + scopeSize, time.Since(t1).Seconds()) +} + +func (ic *importContext) processSingleResource(resourcesChan resourceChannel, + writerChannels map[string]dataWriteChannel, nativeImportChannel importWriteChannel) { + processed := 0 + generated := 0 + ignored := 0 + for r := range resourcesChan { + processed = processed + 1 + if r == nil { + log.Print("[WARN] Got nil resource...") + ic.waitGroup.Done() + continue + } + ir := ic.Importables[r.Resource] + if ir.Ignore != nil && ir.Ignore(ic, r) { + log.Printf("[WARN] Ignoring resource %s: %s", r.Resource, r.Name) + ignored = ignored + 1 + ic.waitGroup.Done() + continue + } + var err error + f := hclwrite.NewEmptyFile() + log.Printf("[TRACE] Generating %s: %s", r.Resource, r.Name) + body := f.Body() + if ir.Body != nil { + err = ir.Body(ic, body, r) + if err != nil { + log.Printf("[ERROR] error calling ir.Body for %v: %s", r, err.Error()) + } + } else { + resourceBlock := body.AppendNewBlock("resource", []string{r.Resource, r.Name}) + err = ic.dataToHcl(ir, []string{}, ic.Resources[r.Resource], r, resourceBlock.Body()) + if err != nil { + log.Printf("[ERROR] error generating body for %v: %s", r, err.Error()) + } + } + if err == nil && len(body.Blocks()) > 0 { + formatted := hclwrite.Format(f.Bytes()) + // fix some formatting in a hacky way instead of writing 100 lines of HCL AST writer code + formatted = []byte(ic.regexFix(string(formatted), ic.hclFixes)) + writeData := &resourceWriteData{ + ResourceBody: string(formatted), + BlockName: generateBlockFullName(body.Blocks()[0]), + } + if r.Mode != "data" && ic.Resources[r.Resource].Importer != nil { + writeData.ImportCommand = r.ImportCommand(ic) + if ic.nativeImportSupported { // generate import block for native import + imp := hclwrite.NewEmptyFile() + imoBlock := imp.Body().AppendNewBlock("import", []string{}) + imoBlock.Body().SetAttributeValue("id", cty.StringVal(r.ID)) + traversal := hcl.Traversal{ + hcl.TraverseRoot{Name: r.Resource}, + hcl.TraverseAttr{Name: r.Name}, + } + tokens := hclwrite.TokensForTraversal(traversal) + imoBlock.Body().SetAttributeRaw("to", tokens) + formattedImp := hclwrite.Format(imp.Bytes()) + //log.Printf("[DEBUG] Import block for %s: %s", r.ID, string(formattedImp)) + ic.waitGroup.Add(1) + nativeImportChannel <- string(formattedImp) + } + } + ch, exists := writerChannels[ir.Service] + if exists { + ic.waitGroup.Add(1) + ch <- writeData + } else { + log.Printf("[WARN] can't find a channel for service: %s, resource: %s", ir.Service, r.Resource) + } + log.Printf("[TRACE] Finished generating %s: %s", r.Resource, r.Name) + generated = generated + 1 + } else { + log.Printf("[WARN] error generating resource body: %v, or body blocks len is 0", err) + } + ic.waitGroup.Done() + } + log.Printf("[DEBUG] processed resources: %d, generated: %d, ignored: %d", processed, generated, ignored) +} + +func extractResourceIdFromImportBlock(block *hclwrite.Block) string { + if block.Type() != "import" { + log.Print("[WARN] it's not an import block!") + return "" + } + idAttr := block.Body().GetAttribute("to") + if idAttr == nil { + log.Printf("[WARN] Can't find `to` attribute in the import block") + return "" + } + idVal := string(idAttr.Expr().BuildTokens(nil).Bytes()) + return strings.TrimSpace(idVal) +} + +func extractResourceIdFromImportBlockString(importBlock string) string { + block, diags := hclwrite.ParseConfig([]byte(importBlock), "test.tf", hcl.Pos{Line: 1, Column: 1}) + if diags.HasErrors() { + log.Printf("[WARN] parsing of import block %s has failed: %s", importBlock, diags.Error()) + return "" + } + if len(block.Body().Blocks()) == 0 { + log.Printf("[WARN] import block %s has 0 blocks!", importBlock) + return "" + } + return extractResourceIdFromImportBlock(block.Body().Blocks()[0]) +} + +func (ic *importContext) writeNativeImports(importChan importWriteChannel) { + if !ic.nativeImportSupported { + log.Print("[DEBUG] Native import is not enabled, skipping...") + return + } + importsFileName := fmt.Sprintf("%s/import.tf", ic.Directory) + // TODO: in incremental mode read existing file with imports and append them for not processed & not deleted resources + var existingFile *hclwrite.File + if ic.incremental { + log.Printf("[DEBUG] Going to read existing file %s", importsFileName) + content, err := os.ReadFile(importsFileName) + if errors.Is(err, os.ErrNotExist) { + log.Printf("[WARN] File %s doesn't exist when using incremental export", importsFileName) + } else if err != nil { + log.Printf("[ERROR] error opening %s", importsFileName) + } else { + log.Printf("[DEBUG] Going to parse existing file %s", importsFileName) + var diags hcl.Diagnostics + existingFile, diags = hclwrite.ParseConfig(content, importsFileName, hcl.Pos{Line: 1, Column: 1}) + if diags.HasErrors() { + log.Printf("[ERROR] parsing of existing file %s failed: %s", importsFileName, diags.Error()) + } else { + log.Printf("[DEBUG] There are %d objects in existing file %s", + len(existingFile.Body().Blocks()), importsFileName) + } + } + } + if existingFile == nil { + existingFile = hclwrite.NewEmptyFile() + } + + // do actual writes + importsFile, err := os.Create(importsFileName) + if err != nil { + log.Printf("[ERROR] Can't create %s: %v", importsFileName, err) + return + } + defer importsFile.Close() + + newImports := make(map[string]struct{}, 100) + log.Printf("[DEBUG] started processing new writes for %s", importsFileName) + // write native imports + for importBlock := range importChan { + if importBlock != "" { + log.Printf("[TRACE] writing import command %s", importBlock) + importsFile.WriteString(importBlock) + id := extractResourceIdFromImportBlockString(importBlock) + if id != "" { + newImports[id] = struct{}{} + } + } else { + log.Print("[WARN] got empty import command...") + } + ic.waitGroup.Done() + } + // write the rest of import blocks + numResources := len(newImports) + log.Printf("[DEBUG] finished processing new writes for %s. Wrote %d resources", importsFileName, numResources) + // update existing file if incremental mode + if ic.incremental { + log.Printf("[DEBUG] Starting to merge existing resources for %s", importsFileName) + f := hclwrite.NewEmptyFile() + for _, block := range existingFile.Body().Blocks() { + blockName := extractResourceIdFromImportBlock(block) + if blockName == "" { + log.Printf("[WARN] can't extract resource ID from import block: %s", + string(block.BuildTokens(nil).Bytes())) + continue + } + _, exists := newImports[blockName] + _, deleted := ic.deletedResources[blockName] + if exists { + log.Printf("[DEBUG] resource %s already generated, skipping...", blockName) + } else if deleted { + log.Printf("[DEBUG] resource %s is deleted, skipping...", blockName) + } else { + log.Printf("[DEBUG] resource %s doesn't exist, adding...", blockName) + f.Body().AppendBlock(block) + numResources = numResources + 1 + } + } + _, err = importsFile.WriteString(string(f.Bytes())) + if err != nil { + log.Printf("[ERROR] error when writing existing resources for file %s: %v", importsFileName, err) + } + log.Printf("[DEBUG] Finished merging existing resources for %s", importsFileName) + } +} + +func (ic *importContext) writeShellImports(sh *os.File, importChan importWriteChannel) { + for importCommand := range importChan { + if importCommand != "" && sh != nil { + log.Printf("[DEBUG] writing import command %s", importCommand) + sh.WriteString(importCommand + "\n") + delete(ic.shImports, importCommand) + } else { + log.Print("[WARN] got empty import command... or file is nil") + } + ic.waitGroup.Done() + } + if sh != nil { + log.Printf("[DEBUG] Writing the rest of import commands. len=%d", len(ic.shImports)) + for k := range ic.shImports { + parts := strings.Split(k, " ") + if len(parts) > 3 { + resource := parts[2] + _, deleted := ic.deletedResources[resource] + if deleted { + log.Printf("[DEBUG] Resource %s is deleted. Skipping import command for it", resource) + continue + } + } + sh.WriteString(k + "\n") + } + } +} + +func generateResourceName(rtype, rname string) string { + return rtype + "." + rname +} + +func generateBlockFullName(block *hclwrite.Block) string { + labels := block.Labels() + return generateResourceName(labels[0], strings.Join(labels[1:], "_")) +} + +type resourceWriteData struct { + BlockName string + ResourceBody string + ImportCommand string +} + +type dataWriteChannel chan *resourceWriteData +type importWriteChannel chan string + +func (ic *importContext) handleResourceWrite(generatedFile string, ch dataWriteChannel, importChan importWriteChannel) { + var existingFile *hclwrite.File + if ic.incremental { + log.Printf("[DEBUG] Going to read existing file %s", generatedFile) + content, err := os.ReadFile(generatedFile) + if errors.Is(err, os.ErrNotExist) { + log.Printf("[WARN] File %s doesn't exist when using incremental export", generatedFile) + } else if err != nil { + log.Printf("[ERROR] error opening %s", generatedFile) + } else { + log.Printf("[DEBUG] Going to parse existing file %s", generatedFile) + var diags hcl.Diagnostics + existingFile, diags = hclwrite.ParseConfig(content, generatedFile, hcl.Pos{Line: 1, Column: 1}) + if diags.HasErrors() { + log.Printf("[ERROR] parsing of existing file %s failed: %s", generatedFile, diags.Error()) + } else { + log.Printf("[DEBUG] There are %d objects in existing file %s", + len(existingFile.Body().Blocks()), generatedFile) + } + } + } + if existingFile == nil { + existingFile = hclwrite.NewEmptyFile() + } + + tf, err := os.Create(generatedFile) + if err != nil { + log.Printf("[ERROR] Can't create %s: %v", generatedFile, err) + return + } + + // + newResources := make(map[string]struct{}, 100) + log.Printf("[DEBUG] started processing new writes for %s", generatedFile) + for f := range ch { + if f != nil { + log.Printf("[DEBUG] started writing resource body for %s", f.BlockName) + _, err = tf.WriteString(f.ResourceBody) + if err == nil { + newResources[f.BlockName] = struct{}{} + if f.ImportCommand != "" { + ic.waitGroup.Add(1) + importChan <- f.ImportCommand + } + log.Printf("[DEBUG] finished writing resource body for %s", f.BlockName) + } else { + log.Printf("[ERROR] Error when writing to %s: %v", generatedFile, err) + } + } else { + log.Print("[WARN] got nil as resourceWriteData!") + } + ic.waitGroup.Done() + } + numResources := len(newResources) + log.Printf("[DEBUG] finished processing new writes for %s. Wrote %d resources", generatedFile, numResources) + // update existing file if incremental mode + if ic.incremental { + log.Printf("[DEBUG] Starting to merge existing resources for %s", generatedFile) + f := hclwrite.NewEmptyFile() + for _, block := range existingFile.Body().Blocks() { + blockName := generateBlockFullName(block) + _, exists := newResources[blockName] + _, deleted := ic.deletedResources[blockName] + if exists { + log.Printf("[DEBUG] resource %s already generated, skipping...", blockName) + } else if deleted { + log.Printf("[DEBUG] resource %s is deleted, skipping...", blockName) + } else { + log.Printf("[DEBUG] resource %s doesn't exist, adding...", blockName) + f.Body().AppendBlock(block) + numResources = numResources + 1 + } + } + _, err = tf.WriteString(string(f.Bytes())) + if err != nil { + log.Printf("[ERROR] error when writing existing resources for file %s: %v", generatedFile, err) + } + log.Printf("[DEBUG] Finished merging existing resources for %s", generatedFile) + } + tf.Close() + if numResources == 0 { + log.Printf("[DEBUG] removing empty file %s - no resources for a given service", generatedFile) + os.Remove(generatedFile) + } +} + +func (ic *importContext) generateResourceIdForWorkspaceObject(obj workspace.ObjectStatus) (string, string) { + var rtype string + switch obj.ObjectType { + case workspace.Directory: + rtype = "databricks_directory" + case workspace.File: + rtype = "databricks_workspace_file" + case workspace.Notebook: + rtype = "databricks_notebook" + default: + log.Printf("[WARN] Unsupported WS object type: %s in obj %v", obj.ObjectType, obj) + return "", "" + } + rData := ic.Resources[rtype].Data( + &terraform.InstanceState{ + ID: obj.Path, + Attributes: map[string]string{}, + }) + rData.Set("object_id", obj.ObjectID) + rData.Set("path", obj.Path) + name := ic.ResourceName(&resource{ + ID: obj.Path, + Resource: rtype, + Data: rData, + }) + return generateResourceName(rtype, name), rtype +} + +func (ic *importContext) loadOldWorkspaceObjects(fileName string) { + ic.oldWorkspaceObjects = []workspace.ObjectStatus{} + // Read a list of resources from previous run + oldDataFile, err := os.ReadFile(fileName) + if err != nil { + log.Printf("[WARN] Can't open the file (%s) with previous list of workspace objects: %s", fileName, err.Error()) + return + } + err = json.Unmarshal(oldDataFile, &ic.oldWorkspaceObjects) + if err != nil { + log.Printf("[WARN] Can't desereialize previous list of workspace objects: %s", err.Error()) + return + } + log.Printf("[DEBUG] Read previous list of workspace objects. got %d objects", len(ic.oldWorkspaceObjects)) + for _, obj := range ic.oldWorkspaceObjects { + ic.oldWorkspaceObjectMapping[obj.ObjectID] = obj.Path + } +} + +func (ic *importContext) findDeletedResources() { + log.Print("[INFO] Starting detection of deleted workspace objects") + if !ic.incremental || len(ic.allWorkspaceObjects) == 0 { + return + } + if len(ic.oldWorkspaceObjects) == 0 { + log.Print("[INFO] Previous list of workspace objects is empty") + return + } + // generate IDs of current objects + currentObjs := map[string]struct{}{} + for _, obj := range ic.allWorkspaceObjects { + obj := obj + if !isSupportedWorkspaceObject(obj) { + continue + } + rid, _ := ic.generateResourceIdForWorkspaceObject(obj) + currentObjs[rid] = struct{}{} + } + // Loop through previous objects, and if it's missing from the current list, add it to deleted, including permission + for _, obj := range ic.oldWorkspaceObjects { + obj := obj + if !isSupportedWorkspaceObject(obj) { + continue + } + rid, rtype := ic.generateResourceIdForWorkspaceObject(obj) + _, exists := currentObjs[rid] + if exists { + log.Printf("[DEBUG] object %s still exists", rid) // change to TRACE? + continue + } + log.Printf("[DEBUG] object %s is deleted!", rid) + ic.deletedResources[rid] = struct{}{} + // convert into permissions. This is quite fragile right now, need to think how to handle it better + var permId string + switch rtype { + case "databricks_notebook": + permId = "databricks_permissions.notebook_" + rid[len(rtype)+1:] + case "databricks_directory": + permId = "databricks_permissions.directory_" + rid[len(rtype)+1:] + case "databricks_workspace_file": + permId = "databricks_permissions.ws_file_" + rid[len(rtype)+1:] + } + log.Printf("[DEBUG] deleted permissions object %s", permId) + if permId != "" { + ic.deletedResources[permId] = struct{}{} + } + } + log.Printf("[INFO] Finished detection of deleted workspace objects. Detected %d deleted objects.", + len(ic.deletedResources)) + log.Printf("[DEBUG] Deleted objects. %v", ic.deletedResources) // change to TRACE? +} diff --git a/exporter/context.go b/exporter/context.go index 441ed12078..122f43ad87 100644 --- a/exporter/context.go +++ b/exporter/context.go @@ -5,15 +5,12 @@ import ( "context" "crypto/md5" "encoding/json" - "errors" "fmt" "log" "os" "os/exec" - "reflect" "regexp" "sort" - "strconv" "strings" "sync" "time" @@ -29,12 +26,7 @@ import ( "github.com/databricks/terraform-provider-databricks/scim" "github.com/databricks/terraform-provider-databricks/workspace" - "github.com/hashicorp/hcl/v2" - "github.com/hashicorp/hcl/v2/hclsyntax" - "github.com/hashicorp/hcl/v2/hclwrite" "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" - "github.com/hashicorp/terraform-plugin-sdk/v2/terraform" - "github.com/zclconf/go-cty/cty" ) /** High level overview of importer design: @@ -539,114 +531,6 @@ func (ic *importContext) Run() error { return nil } -func isSupportedWsObject(obj workspace.ObjectStatus) bool { - switch obj.ObjectType { - case workspace.Directory, workspace.Notebook, workspace.File: - return true - } - return false -} - -func (ic *importContext) generateResourceIdForWsObject(obj workspace.ObjectStatus) (string, string) { - var rtype string - switch obj.ObjectType { - case workspace.Directory: - rtype = "databricks_directory" - case workspace.File: - rtype = "databricks_workspace_file" - case workspace.Notebook: - rtype = "databricks_notebook" - default: - log.Printf("[WARN] Unsupported WS object type: %s in obj %v", obj.ObjectType, obj) - return "", "" - } - rData := ic.Resources[rtype].Data( - &terraform.InstanceState{ - ID: obj.Path, - Attributes: map[string]string{}, - }) - rData.Set("object_id", obj.ObjectID) - rData.Set("path", obj.Path) - name := ic.ResourceName(&resource{ - ID: obj.Path, - Resource: rtype, - Data: rData, - }) - return generateResourceName(rtype, name), rtype -} - -func (ic *importContext) loadOldWorkspaceObjects(fileName string) { - ic.oldWorkspaceObjects = []workspace.ObjectStatus{} - // Read a list of resources from previous run - oldDataFile, err := os.ReadFile(fileName) - if err != nil { - log.Printf("[WARN] Can't open the file (%s) with previous list of workspace objects: %s", fileName, err.Error()) - return - } - err = json.Unmarshal(oldDataFile, &ic.oldWorkspaceObjects) - if err != nil { - log.Printf("[WARN] Can't desereialize previous list of workspace objects: %s", err.Error()) - return - } - log.Printf("[DEBUG] Read previous list of workspace objects. got %d objects", len(ic.oldWorkspaceObjects)) - for _, obj := range ic.oldWorkspaceObjects { - ic.oldWorkspaceObjectMapping[obj.ObjectID] = obj.Path - } -} - -func (ic *importContext) findDeletedResources() { - log.Print("[INFO] Starting detection of deleted workspace objects") - if !ic.incremental || len(ic.allWorkspaceObjects) == 0 { - return - } - if len(ic.oldWorkspaceObjects) == 0 { - log.Print("[INFO] Previous list of workspace objects is empty") - return - } - // generate IDs of current objects - currentObjs := map[string]struct{}{} - for _, obj := range ic.allWorkspaceObjects { - obj := obj - if !isSupportedWsObject(obj) { - continue - } - rid, _ := ic.generateResourceIdForWsObject(obj) - currentObjs[rid] = struct{}{} - } - // Loop through previous objects, and if it's missing from the current list, add it to deleted, including permission - for _, obj := range ic.oldWorkspaceObjects { - obj := obj - if !isSupportedWsObject(obj) { - continue - } - rid, rtype := ic.generateResourceIdForWsObject(obj) - _, exists := currentObjs[rid] - if exists { - log.Printf("[DEBUG] object %s still exists", rid) // change to TRACE? - continue - } - log.Printf("[DEBUG] object %s is deleted!", rid) - ic.deletedResources[rid] = struct{}{} - // convert into permissions. This is quite fragile right now, need to think how to handle it better - var permId string - switch rtype { - case "databricks_notebook": - permId = "databricks_permissions.notebook_" + rid[len(rtype)+1:] - case "databricks_directory": - permId = "databricks_permissions.directory_" + rid[len(rtype)+1:] - case "databricks_workspace_file": - permId = "databricks_permissions.ws_file_" + rid[len(rtype)+1:] - } - log.Printf("[DEBUG] deleted permissions object %s", permId) - if permId != "" { - ic.deletedResources[permId] = struct{}{} - } - } - log.Printf("[INFO] Finished detection of deleted workspace objects. Detected %d deleted objects.", - len(ic.deletedResources)) - log.Printf("[DEBUG] Deleted objects. %v", ic.deletedResources) // change to TRACE? -} - func (ic *importContext) resourceHandler(num int, resourceType string, ch resourceChannel) { log.Printf("[DEBUG] Starting goroutine %d for resource %s", num, resourceType) for r := range ch { @@ -693,638 +577,6 @@ func (ic *importContext) closeImportChannels() { close(ic.defaultChannel) } -func generateResourceName(rtype, rname string) string { - return rtype + "." + rname -} - -func generateBlockFullName(block *hclwrite.Block) string { - labels := block.Labels() - return generateResourceName(labels[0], strings.Join(labels[1:], "_")) -} - -type resourceWriteData struct { - BlockName string - ResourceBody string - ImportCommand string -} - -type dataWriteChannel chan *resourceWriteData -type importWriteChannel chan string - -func (ic *importContext) handleResourceWrite(generatedFile string, ch dataWriteChannel, importChan importWriteChannel) { - var existingFile *hclwrite.File - if ic.incremental { - log.Printf("[DEBUG] Going to read existing file %s", generatedFile) - content, err := os.ReadFile(generatedFile) - if errors.Is(err, os.ErrNotExist) { - log.Printf("[WARN] File %s doesn't exist when using incremental export", generatedFile) - } else if err != nil { - log.Printf("[ERROR] error opening %s", generatedFile) - } else { - log.Printf("[DEBUG] Going to parse existing file %s", generatedFile) - var diags hcl.Diagnostics - existingFile, diags = hclwrite.ParseConfig(content, generatedFile, hcl.Pos{Line: 1, Column: 1}) - if diags.HasErrors() { - log.Printf("[ERROR] parsing of existing file %s failed: %s", generatedFile, diags.Error()) - } else { - log.Printf("[DEBUG] There are %d objects in existing file %s", - len(existingFile.Body().Blocks()), generatedFile) - } - } - } - if existingFile == nil { - existingFile = hclwrite.NewEmptyFile() - } - - tf, err := os.Create(generatedFile) - if err != nil { - log.Printf("[ERROR] Can't create %s: %v", generatedFile, err) - return - } - - // - newResources := make(map[string]struct{}, 100) - log.Printf("[DEBUG] started processing new writes for %s", generatedFile) - for f := range ch { - if f != nil { - log.Printf("[DEBUG] started writing resource body for %s", f.BlockName) - _, err = tf.WriteString(f.ResourceBody) - if err == nil { - newResources[f.BlockName] = struct{}{} - if f.ImportCommand != "" { - ic.waitGroup.Add(1) - importChan <- f.ImportCommand - } - log.Printf("[DEBUG] finished writing resource body for %s", f.BlockName) - } else { - log.Printf("[ERROR] Error when writing to %s: %v", generatedFile, err) - } - } else { - log.Print("[WARN] got nil as resourceWriteData!") - } - ic.waitGroup.Done() - } - numResources := len(newResources) - log.Printf("[DEBUG] finished processing new writes for %s. Wrote %d resources", generatedFile, numResources) - // update existing file if incremental mode - if ic.incremental { - log.Printf("[DEBUG] Starting to merge existing resources for %s", generatedFile) - f := hclwrite.NewEmptyFile() - for _, block := range existingFile.Body().Blocks() { - blockName := generateBlockFullName(block) - _, exists := newResources[blockName] - _, deleted := ic.deletedResources[blockName] - if exists { - log.Printf("[DEBUG] resource %s already generated, skipping...", blockName) - } else if deleted { - log.Printf("[DEBUG] resource %s is deleted, skipping...", blockName) - } else { - log.Printf("[DEBUG] resource %s doesn't exist, adding...", blockName) - f.Body().AppendBlock(block) - numResources = numResources + 1 - } - } - _, err = tf.WriteString(string(f.Bytes())) - if err != nil { - log.Printf("[ERROR] error when writing existing resources for file %s: %v", generatedFile, err) - } - log.Printf("[DEBUG] Finished merging existing resources for %s", generatedFile) - } - tf.Close() - if numResources == 0 { - log.Printf("[DEBUG] removing empty file %s - no resources for a given service", generatedFile) - os.Remove(generatedFile) - } -} - -func (ic *importContext) writeShellImports(sh *os.File, importChan importWriteChannel) { - for importCommand := range importChan { - if importCommand != "" && sh != nil { - log.Printf("[DEBUG] writing import command %s", importCommand) - sh.WriteString(importCommand + "\n") - delete(ic.shImports, importCommand) - } else { - log.Print("[WARN] got empty import command... or file is nil") - } - ic.waitGroup.Done() - } - if sh != nil { - log.Printf("[DEBUG] Writing the rest of import commands. len=%d", len(ic.shImports)) - for k := range ic.shImports { - parts := strings.Split(k, " ") - if len(parts) > 3 { - resource := parts[2] - _, deleted := ic.deletedResources[resource] - if deleted { - log.Printf("[DEBUG] Resource %s is deleted. Skipping import command for it", resource) - continue - } - } - sh.WriteString(k + "\n") - } - } -} - -func extractResourceIdFromImportBlock(block *hclwrite.Block) string { - if block.Type() != "import" { - log.Print("[WARN] it's not an import block!") - return "" - } - idAttr := block.Body().GetAttribute("to") - if idAttr == nil { - log.Printf("[WARN] Can't find `to` attribute in the import block") - return "" - } - idVal := string(idAttr.Expr().BuildTokens(nil).Bytes()) - return strings.TrimSpace(idVal) -} - -func extractResourceIdFromImportBlockString(importBlock string) string { - block, diags := hclwrite.ParseConfig([]byte(importBlock), "test.tf", hcl.Pos{Line: 1, Column: 1}) - if diags.HasErrors() { - log.Printf("[WARN] parsing of import block %s has failed: %s", importBlock, diags.Error()) - return "" - } - if len(block.Body().Blocks()) == 0 { - log.Printf("[WARN] import block %s has 0 blocks!", importBlock) - return "" - } - return extractResourceIdFromImportBlock(block.Body().Blocks()[0]) -} - -func (ic *importContext) writeNativeImports(importChan importWriteChannel) { - if !ic.nativeImportSupported { - log.Print("[DEBUG] Native import is not enabled, skipping...") - return - } - importsFileName := fmt.Sprintf("%s/import.tf", ic.Directory) - // TODO: in incremental mode read existing file with imports and append them for not processed & not deleted resources - var existingFile *hclwrite.File - if ic.incremental { - log.Printf("[DEBUG] Going to read existing file %s", importsFileName) - content, err := os.ReadFile(importsFileName) - if errors.Is(err, os.ErrNotExist) { - log.Printf("[WARN] File %s doesn't exist when using incremental export", importsFileName) - } else if err != nil { - log.Printf("[ERROR] error opening %s", importsFileName) - } else { - log.Printf("[DEBUG] Going to parse existing file %s", importsFileName) - var diags hcl.Diagnostics - existingFile, diags = hclwrite.ParseConfig(content, importsFileName, hcl.Pos{Line: 1, Column: 1}) - if diags.HasErrors() { - log.Printf("[ERROR] parsing of existing file %s failed: %s", importsFileName, diags.Error()) - } else { - log.Printf("[DEBUG] There are %d objects in existing file %s", - len(existingFile.Body().Blocks()), importsFileName) - } - } - } - if existingFile == nil { - existingFile = hclwrite.NewEmptyFile() - } - - // do actual writes - importsFile, err := os.Create(importsFileName) - if err != nil { - log.Printf("[ERROR] Can't create %s: %v", importsFileName, err) - return - } - defer importsFile.Close() - - newImports := make(map[string]struct{}, 100) - log.Printf("[DEBUG] started processing new writes for %s", importsFileName) - // write native imports - for importBlock := range importChan { - if importBlock != "" { - log.Printf("[TRACE] writing import command %s", importBlock) - importsFile.WriteString(importBlock) - id := extractResourceIdFromImportBlockString(importBlock) - if id != "" { - newImports[id] = struct{}{} - } - } else { - log.Print("[WARN] got empty import command...") - } - ic.waitGroup.Done() - } - // write the rest of import blocks - numResources := len(newImports) - log.Printf("[DEBUG] finished processing new writes for %s. Wrote %d resources", importsFileName, numResources) - // update existing file if incremental mode - if ic.incremental { - log.Printf("[DEBUG] Starting to merge existing resources for %s", importsFileName) - f := hclwrite.NewEmptyFile() - for _, block := range existingFile.Body().Blocks() { - blockName := extractResourceIdFromImportBlock(block) - if blockName == "" { - log.Printf("[WARN] can't extract resource ID from import block: %s", - string(block.BuildTokens(nil).Bytes())) - continue - } - _, exists := newImports[blockName] - _, deleted := ic.deletedResources[blockName] - if exists { - log.Printf("[DEBUG] resource %s already generated, skipping...", blockName) - } else if deleted { - log.Printf("[DEBUG] resource %s is deleted, skipping...", blockName) - } else { - log.Printf("[DEBUG] resource %s doesn't exist, adding...", blockName) - f.Body().AppendBlock(block) - numResources = numResources + 1 - } - } - _, err = importsFile.WriteString(string(f.Bytes())) - if err != nil { - log.Printf("[ERROR] error when writing existing resources for file %s: %v", importsFileName, err) - } - log.Printf("[DEBUG] Finished merging existing resources for %s", importsFileName) - } - -} - -func (ic *importContext) processSingleResource(resourcesChan resourceChannel, - writerChannels map[string]dataWriteChannel, nativeImportChannel importWriteChannel) { - processed := 0 - generated := 0 - ignored := 0 - for r := range resourcesChan { - processed = processed + 1 - if r == nil { - log.Print("[WARN] Got nil resource...") - ic.waitGroup.Done() - continue - } - ir := ic.Importables[r.Resource] - if ir.Ignore != nil && ir.Ignore(ic, r) { - log.Printf("[WARN] Ignoring resource %s: %s", r.Resource, r.Name) - ignored = ignored + 1 - ic.waitGroup.Done() - continue - } - var err error - f := hclwrite.NewEmptyFile() - log.Printf("[TRACE] Generating %s: %s", r.Resource, r.Name) - body := f.Body() - if ir.Body != nil { - err = ir.Body(ic, body, r) - if err != nil { - log.Printf("[ERROR] error calling ir.Body for %v: %s", r, err.Error()) - } - } else { - resourceBlock := body.AppendNewBlock("resource", []string{r.Resource, r.Name}) - err = ic.dataToHcl(ir, []string{}, ic.Resources[r.Resource], r, resourceBlock.Body()) - if err != nil { - log.Printf("[ERROR] error generating body for %v: %s", r, err.Error()) - } - } - if err == nil && len(body.Blocks()) > 0 { - formatted := hclwrite.Format(f.Bytes()) - // fix some formatting in a hacky way instead of writing 100 lines of HCL AST writer code - formatted = []byte(ic.regexFix(string(formatted), ic.hclFixes)) - writeData := &resourceWriteData{ - ResourceBody: string(formatted), - BlockName: generateBlockFullName(body.Blocks()[0]), - } - if r.Mode != "data" && ic.Resources[r.Resource].Importer != nil { - writeData.ImportCommand = r.ImportCommand(ic) - if ic.nativeImportSupported { // generate import block for native import - imp := hclwrite.NewEmptyFile() - imoBlock := imp.Body().AppendNewBlock("import", []string{}) - imoBlock.Body().SetAttributeValue("id", cty.StringVal(r.ID)) - traversal := hcl.Traversal{ - hcl.TraverseRoot{Name: r.Resource}, - hcl.TraverseAttr{Name: r.Name}, - } - tokens := hclwrite.TokensForTraversal(traversal) - imoBlock.Body().SetAttributeRaw("to", tokens) - formattedImp := hclwrite.Format(imp.Bytes()) - //log.Printf("[DEBUG] Import block for %s: %s", r.ID, string(formattedImp)) - ic.waitGroup.Add(1) - nativeImportChannel <- string(formattedImp) - } - } - ch, exists := writerChannels[ir.Service] - if exists { - ic.waitGroup.Add(1) - ch <- writeData - } else { - log.Printf("[WARN] can't find a channel for service: %s, resource: %s", ir.Service, r.Resource) - } - log.Printf("[TRACE] Finished generating %s: %s", r.Resource, r.Name) - generated = generated + 1 - } else { - log.Printf("[WARN] error generating resource body: %v, or body blocks len is 0", err) - } - ic.waitGroup.Done() - } - log.Printf("[DEBUG] processed resources: %d, generated: %d, ignored: %d", processed, generated, ignored) -} - -func (ic *importContext) generateAndWriteResources(sh *os.File) { - resources := ic.Scope.Sorted() - scopeSize := ic.Scope.Len() - t1 := time.Now() - log.Printf("[INFO] Generating configuration for %d resources", scopeSize) - - // make configurable via environment variables - resourceHandlersNumber := getEnvAsInt("EXPORTER_RESOURCE_GENERATORS", 50) - resourcesChan := make(resourceChannel, defaultChannelSize) - - resourceWriters := make(map[string]dataWriteChannel, len(ic.Resources)) - for service := range ic.services { - resourceWriters[service] = make(dataWriteChannel, defaultChannelSize) - } - writersWaitGroup := &sync.WaitGroup{} - // write shell script for importing - shellImportChan := make(importWriteChannel, defaultChannelSize) - writersWaitGroup.Add(1) - go func() { - ic.writeShellImports(sh, shellImportChan) - writersWaitGroup.Done() - }() - // - nativeImportChan := make(importWriteChannel, defaultChannelSize) - writersWaitGroup.Add(1) - go func() { - ic.writeNativeImports(nativeImportChan) - writersWaitGroup.Done() - }() - // start resource handlers - for i := 0; i < resourceHandlersNumber; i++ { - i := i - go func() { - log.Printf("[DEBUG] Starting resource handler %d", i) - ic.processSingleResource(resourcesChan, resourceWriters, nativeImportChan) - }() - } - // start writers for specific services - for service, ch := range resourceWriters { - service := service - ch := ch - generatedFile := fmt.Sprintf("%s/%s.tf", ic.Directory, service) - log.Printf("[DEBUG] starting writer for service %s", service) - writersWaitGroup.Add(1) - go func() { - ic.handleResourceWrite(generatedFile, ch, shellImportChan) - writersWaitGroup.Done() - }() - } - // submit all extracted resources... - for i, r := range resources { - ic.waitGroup.Add(1) - resourcesChan <- r - if i%500 == 0 { - log.Printf("[INFO] Submitted %d of %d resources", i+1, scopeSize) - } - } - ic.waitGroup.Wait() - // close all channels - close(shellImportChan) - close(nativeImportChan) - close(resourcesChan) - for service, ch := range resourceWriters { - log.Printf("Closing writer for service %s", service) - close(ch) - } - writersWaitGroup.Wait() - - log.Printf("[INFO] Finished generation of configuration for %d resources (took %v seconds)", - scopeSize, time.Since(t1).Seconds()) -} - -func (ic *importContext) generateGitIgnore() { - fileName := fmt.Sprintf("%s/.gitignore", ic.Directory) - vf, err := os.Create(fileName) - if err != nil { - log.Printf("[ERROR] can't create %s: %v", fileName, err) - return - } - defer vf.Close() - // nolint - vf.Write([]byte("terraform.tfvars\n")) -} - -func (ic *importContext) generateTfvars() error { - // TODO: make it incremental as well... - if len(ic.tfvars) == 0 { - return nil - } - f := hclwrite.NewEmptyFile() - body := f.Body() - fileName := fmt.Sprintf("%s/terraform.tfvars", ic.Directory) - - vf, err := os.Create(fileName) - if err != nil { - return err - } - defer vf.Close() - - for k, v := range ic.tfvars { - body.SetAttributeValue(k, cty.StringVal(v)) - } - // nolint - vf.Write(f.Bytes()) - log.Printf("[INFO] Written %d tfvars", len(ic.tfvars)) - - ic.generateGitIgnore() - - return nil -} - -func (ic *importContext) generateVariables() error { - if len(ic.variables) == 0 { - return nil - } - f := hclwrite.NewEmptyFile() - body := f.Body() - fileName := fmt.Sprintf("%s/vars.tf", ic.Directory) - if ic.incremental { - content, err := os.ReadFile(fileName) - if err == nil { - ftmp, diags := hclwrite.ParseConfig(content, fileName, hcl.Pos{Line: 1, Column: 1}) - if diags.HasErrors() { - log.Printf("[ERROR] parsing of existing file failed: %s", diags) - } else { - tbody := ftmp.Body() - for _, block := range tbody.Blocks() { - typ := block.Type() - labels := block.Labels() - log.Printf("[DEBUG] blockBody: %v %v\n", typ, labels) - _, present := ic.variables[labels[0]] - if typ == "variable" && present { - log.Printf("[DEBUG] Ignoring variable '%s' that will be re-exported", labels[0]) - } else { - log.Printf("[DEBUG] Adding not exported object. type='%s', labels=%v", typ, labels) - body.AppendBlock(block) - } - } - } - } else { - log.Printf("[ERROR] opening file %s", fileName) - } - } - vf, err := os.Create(fileName) - if err != nil { - return err - } - defer vf.Close() - - for k, v := range ic.variables { - b := body.AppendNewBlock("variable", []string{k}).Body() - b.SetAttributeValue("description", cty.StringVal(v)) - } - // nolint - vf.Write(f.Bytes()) - log.Printf("[INFO] Written %d variables", len(ic.variables)) - return nil -} - -func (ic *importContext) MatchesName(n string) bool { - if ic.match == "" { - return true - } - return strings.Contains(strings.ToLower(n), strings.ToLower(ic.match)) -} - -func genTraversalTokens(sr *resourceApproximation, pick string) hcl.Traversal { - if sr.Mode == "data" { - return hcl.Traversal{ - hcl.TraverseRoot{Name: "data"}, - hcl.TraverseAttr{Name: sr.Type}, - hcl.TraverseAttr{Name: sr.Name}, - hcl.TraverseAttr{Name: pick}, - } - } - return hcl.Traversal{ - hcl.TraverseRoot{Name: sr.Type}, - hcl.TraverseAttr{Name: sr.Name}, - hcl.TraverseAttr{Name: pick}, - } -} - -func (ic *importContext) isIgnoredResourceApproximation(ra *resourceApproximation) bool { - var ignored bool - if ra != nil && ra.Resource != nil { - ignoreFunc := ic.Importables[ra.Type].Ignore - if ignoreFunc != nil && ignoreFunc(ic, ra.Resource) { - log.Printf("[WARN] Found reference to the ignored resource %s: %s", ra.Type, ra.Name) - return true - } - } - return ignored -} - -func (ic *importContext) Find(value, attr string, ref reference, origResource *resource, origPath string) (string, hcl.Traversal, bool) { - log.Printf("[DEBUG] Starting searching for reference for resource %s, attr='%s', value='%s', ref=%v", - ref.Resource, attr, value, ref) - // optimize performance by avoiding doing regexp matching multiple times - matchValue := "" - switch ref.MatchType { - case MatchRegexp: - if ref.Regexp == nil { - log.Printf("[WARN] you must provide regular expression for 'regexp' match type") - return "", nil, false - } - res := ref.Regexp.FindStringSubmatch(value) - if len(res) < 2 { - log.Printf("[WARN] no match for regexp: %v in string %s", ref.Regexp, value) - return "", nil, false - } - matchValue = res[1] - case MatchCaseInsensitive: - matchValue = strings.ToLower(value) // performance optimization to avoid doing it in the loop - case MatchExact, MatchDefault: - matchValue = value - case MatchPrefix, MatchLongestPrefix: - if ref.MatchValueTransformFunc != nil { - matchValue = ref.MatchValueTransformFunc(value) - } else { - matchValue = value - } - } - // doing explicit lookup in the state. For case insensitive matches, first attempt to lookup for the value, - // and do iteration if it's not found - if (ref.MatchType == MatchExact || ref.MatchType == MatchDefault || ref.MatchType == MatchRegexp || - ref.MatchType == MatchCaseInsensitive) && !ref.SkipDirectLookup { - sr := ic.State.Get(ref.Resource, attr, matchValue) - if sr != nil && (ref.IsValidApproximation == nil || ref.IsValidApproximation(ic, origResource, sr, origPath)) && - !ic.isIgnoredResourceApproximation(sr) { - log.Printf("[DEBUG] Finished direct lookup for reference for resource %s, attr='%s', value='%s', ref=%v. Found: type=%s name=%s", - ref.Resource, attr, value, ref, sr.Type, sr.Name) - return matchValue, genTraversalTokens(sr, attr), sr.Mode == "data" - } - if ref.MatchType != MatchCaseInsensitive { // for case-insensitive matching we'll try iteration - log.Printf("[DEBUG] Finished direct lookup for reference for resource %s, attr='%s', value='%s', ref=%v. Not found", - ref.Resource, attr, value, ref) - return "", nil, false - } - } else if ref.MatchType == MatchLongestPrefix && ref.ExtraLookupKey != "" { - extraKeyValue, exists := origResource.GetExtraData(ref.ExtraLookupKey) - if exists && extraKeyValue.(string) != "" { - sr := ic.State.Get(ref.Resource, attr, extraKeyValue.(string)) - if sr != nil && (ref.IsValidApproximation == nil || ref.IsValidApproximation(ic, origResource, sr, origPath)) && - !ic.isIgnoredResourceApproximation(sr) { - log.Printf("[DEBUG] Finished direct lookup by key %s for reference for resource %s, attr='%s', value='%s', ref=%v. Found: type=%s name=%s", - ref.ExtraLookupKey, ref.Resource, attr, value, ref, sr.Type, sr.Name) - return extraKeyValue.(string), genTraversalTokens(sr, attr), sr.Mode == "data" - } - } - } - - maxPrefixLen := 0 - maxPrefixOrigValue := "" - var maxPrefixResource *resourceApproximation - srs := *ic.State.Resources(ref.Resource) - for _, sr := range srs { - for _, i := range sr.Instances { - v := i.Attributes[attr] - if v == nil { - log.Printf("[WARN] Can't find instance attribute '%v' in resource: '%v'", attr, ref.Resource) - continue - } - strValue := v.(string) - origValue := strValue - if ref.SearchValueTransformFunc != nil { - strValue = ref.SearchValueTransformFunc(strValue) - log.Printf("[TRACE] Resource %s. Transformed value from '%s' to '%s'", ref.Resource, origValue, strValue) - } - matched := false - switch ref.MatchType { - case MatchCaseInsensitive: - matched = (strings.ToLower(strValue) == matchValue) - case MatchPrefix: - matched = strings.HasPrefix(matchValue, strValue) - case MatchLongestPrefix: - if strings.HasPrefix(matchValue, strValue) && len(origValue) > maxPrefixLen && !ic.isIgnoredResourceApproximation(sr) { - maxPrefixLen = len(origValue) - maxPrefixOrigValue = origValue - maxPrefixResource = sr - } - case MatchExact, MatchDefault: - matched = (strValue == matchValue) - default: - log.Printf("[WARN] Unsupported match type: %s", ref.MatchType) - } - if !matched || (ref.IsValidApproximation != nil && !ref.IsValidApproximation(ic, origResource, sr, origPath)) || - ic.isIgnoredResourceApproximation(sr) { - continue - } - log.Printf("[DEBUG] Finished searching for reference for resource %s, attr='%s', value='%s', ref=%v. Found: type=%s name=%s", - ref.Resource, attr, value, ref, sr.Type, sr.Name) - return origValue, genTraversalTokens(sr, attr), sr.Mode == "data" - } - } - if ref.MatchType == MatchLongestPrefix && maxPrefixResource != nil && - (ref.IsValidApproximation == nil || ref.IsValidApproximation(ic, origResource, maxPrefixResource, origPath)) && - !ic.isIgnoredResourceApproximation(maxPrefixResource) { - log.Printf("[DEBUG] Finished searching longest prefix for reference for resource %s, attr='%s', value='%s', ref=%v. Found: type=%s name=%s", - ref.Resource, attr, value, ref, maxPrefixResource.Type, maxPrefixResource.Name) - return maxPrefixOrigValue, genTraversalTokens(maxPrefixResource, attr), maxPrefixResource.Mode == "data" - } - log.Printf("[DEBUG] Finished searching for reference for resource %s, pick=%s, ref=%v. Not found", ref.Resource, attr, ref) - return "", nil, false -} - // This function checks if resource exist in any state (already added or in process of addition) func (ic *importContext) Has(r *resource) bool { return ic.HasInState(r, false) @@ -1412,16 +664,6 @@ func (ic *importContext) ResourceName(r *resource) string { return name } -func (ic *importContext) isServiceEnabled(service string) bool { - _, exists := ic.services[service] - return exists -} - -func (ic *importContext) isServiceInListing(service string) bool { - _, exists := ic.listing[service] - return exists -} - func (ic *importContext) EmitIfUpdatedAfterMillis(r *resource, modifiedAt int64, message string) { updatedSinceMs := ic.getUpdatedSinceMs() if ic.incremental && modifiedAt < updatedSinceMs { @@ -1500,315 +742,3 @@ func (ic *importContext) Emit(r *resource) { ic.defaultChannel <- r } } - -func maybeAddQuoteCharacter(s string) string { - s = strings.ReplaceAll(s, "\\", "\\\\") - s = strings.ReplaceAll(s, "\"", "\\\"") - return s -} - -func (ic *importContext) getTraversalTokens(ref reference, value string, origResource *resource, origPath string) (hclwrite.Tokens, bool) { - matchType := ref.MatchTypeValue() - attr := ref.MatchAttribute() - attrValue, traversal, isData := ic.Find(value, attr, ref, origResource, origPath) - // at least one invocation of ic.Find will assign Nil to traversal if resource with value is not found - if traversal == nil { - return nil, isData - } - // capture if it's data? - switch matchType { - case MatchExact, MatchDefault, MatchCaseInsensitive: - return hclwrite.TokensForTraversal(traversal), isData - case MatchPrefix, MatchLongestPrefix: - rest := value[len(attrValue):] - tokens := hclwrite.Tokens{&hclwrite.Token{Type: hclsyntax.TokenOQuote, Bytes: []byte{'"', '$', '{'}}} - tokens = append(tokens, hclwrite.TokensForTraversal(traversal)...) - tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenCQuote, Bytes: []byte{'}'}}) - tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenQuotedLit, Bytes: []byte(maybeAddQuoteCharacter(rest))}) - tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenCQuote, Bytes: []byte{'"'}}) - return tokens, isData - case MatchRegexp: - indices := ref.Regexp.FindStringSubmatchIndex(value) - if len(indices) == 4 { - tokens := hclwrite.Tokens{&hclwrite.Token{Type: hclsyntax.TokenOQuote, Bytes: []byte{'"'}}} - tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenQuotedLit, Bytes: []byte(maybeAddQuoteCharacter(value[0:indices[2]]))}) - tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenOQuote, Bytes: []byte{'$', '{'}}) - tokens = append(tokens, hclwrite.TokensForTraversal(traversal)...) - tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenCQuote, Bytes: []byte{'}'}}) - tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenQuotedLit, Bytes: []byte(maybeAddQuoteCharacter(value[indices[3]:]))}) - tokens = append(tokens, &hclwrite.Token{Type: hclsyntax.TokenCQuote, Bytes: []byte{'"'}}) - return tokens, isData - } - log.Printf("[WARN] Can't match found data in '%s'. Indices: %v", value, indices) - default: - log.Printf("[WARN] Unsupported match type: %s", ref.MatchType) - } - return nil, false -} - -// TODO: move to IC -var dependsRe = regexp.MustCompile(`(\.[\d]+)`) - -func (ic *importContext) generateVariableName(attrName, name string) string { - return fmt.Sprintf("%s_%s", attrName, name) -} - -func (ic *importContext) reference(i importable, path []string, value string, ctyValue cty.Value, origResource *resource) hclwrite.Tokens { - pathString := strings.Join(path, ".") - match := dependsRe.ReplaceAllString(pathString, "") - // get reference candidate, but if it's a `data`, then look for another non-data reference if possible.. - var dataTokens hclwrite.Tokens - for _, d := range i.Depends { - if d.Path != match { - continue - } - if d.File { - relativeFile := fmt.Sprintf("${path.module}/%s", value) - return hclwrite.Tokens{ - &hclwrite.Token{Type: hclsyntax.TokenOQuote, Bytes: []byte{'"'}}, - &hclwrite.Token{Type: hclsyntax.TokenQuotedLit, Bytes: []byte(relativeFile)}, - &hclwrite.Token{Type: hclsyntax.TokenCQuote, Bytes: []byte{'"'}}, - } - } - if d.Variable { - varName := ic.generateVariableName(path[0], value) - return ic.variable(varName, "") - } - - tokens, isData := ic.getTraversalTokens(d, value, origResource, pathString) - if tokens != nil { - if isData { - dataTokens = tokens - log.Printf("[DEBUG] Got reference to data for dependency %v", d) - } else { - return tokens - } - } - } - if len(dataTokens) > 0 { - return dataTokens - } - return hclwrite.TokensForValue(ctyValue) -} - -func (ic *importContext) variable(name, desc string) hclwrite.Tokens { - ic.variablesLock.Lock() - ic.variables[name] = desc - ic.variablesLock.Unlock() - return hclwrite.TokensForTraversal(hcl.Traversal{ - hcl.TraverseRoot{Name: "var"}, - hcl.TraverseAttr{Name: name}, - }) -} - -type fieldTuple struct { - Field string - Schema *schema.Schema -} - -func (ic *importContext) dataToHcl(i importable, path []string, - pr *schema.Resource, res *resource, body *hclwrite.Body) error { - d := res.Data - ss := []fieldTuple{} - for a, as := range pr.Schema { - ss = append(ss, fieldTuple{a, as}) - } - sort.Slice(ss, func(i, j int) bool { - // it just happens that reverse field order - // makes the most beautiful configs - return ss[i].Field > ss[j].Field - }) - var_cnt := 0 - for _, tuple := range ss { - a, as := tuple.Field, tuple.Schema - pathString := strings.Join(append(path, a), ".") - raw, nonZero := d.GetOk(pathString) - // log.Printf("[DEBUG] path=%s, raw='%v'", pathString, raw) - if i.ShouldOmitField == nil { // we don't have custom function, so skip computed & default fields - if defaultShouldOmitFieldFunc(ic, pathString, as, d) { - continue - } - } else if i.ShouldOmitField(ic, pathString, as, d) { - continue - } - mpath := dependsRe.ReplaceAllString(pathString, "") - for _, ref := range i.Depends { - if ref.Path == mpath && ref.Variable { - // sensitive fields are moved to variable depends, variable name is normalized - // TODO: handle a case when we have multiple blocks, so names won't be unique - raw = ic.regexFix(ic.ResourceName(res), simpleNameFixes) - if var_cnt > 0 { - raw = fmt.Sprintf("%s_%d", raw, var_cnt) - } - nonZero = true - var_cnt++ - } - } - shouldSkip := !nonZero - if as.Required { // for required fields we must produce a value, even empty... - shouldSkip = false - } else if as.Default != nil && !reflect.DeepEqual(raw, as.Default) { - // In case when have zero value, but there is non-zero default, we also need to produce it - shouldSkip = false - } - if shouldSkip && (i.ShouldGenerateField == nil || !i.ShouldGenerateField(ic, pathString, as, d)) { - continue - } - switch as.Type { - case schema.TypeString: - value := raw.(string) - tokens := ic.reference(i, append(path, a), value, cty.StringVal(value), res) - body.SetAttributeRaw(a, tokens) - case schema.TypeBool: - body.SetAttributeValue(a, cty.BoolVal(raw.(bool))) - case schema.TypeInt: - var num int64 - switch iv := raw.(type) { - case int: - num = int64(iv) - case int32: - num = int64(iv) - case int64: - num = iv - } - body.SetAttributeRaw(a, ic.reference(i, append(path, a), - strconv.FormatInt(num, 10), cty.NumberIntVal(num), res)) - case schema.TypeFloat: - body.SetAttributeValue(a, cty.NumberFloatVal(raw.(float64))) - case schema.TypeMap: - // TODO: Resolve references in maps as well, and also support different types inside map... - ov := map[string]cty.Value{} - for key, iv := range raw.(map[string]any) { - v := cty.StringVal(fmt.Sprintf("%v", iv)) - ov[key] = v - } - body.SetAttributeValue(a, cty.ObjectVal(ov)) - case schema.TypeSet: - if rawSet, ok := raw.(*schema.Set); ok { - rawList := rawSet.List() - err := ic.readListFromData(i, append(path, a), res, rawList, body, as, func(i int) string { - return strconv.Itoa(rawSet.F(rawList[i])) - }) - if err != nil { - return err - } - } - case schema.TypeList: - if rawList, ok := raw.([]any); ok { - err := ic.readListFromData(i, append(path, a), res, rawList, body, as, strconv.Itoa) - if err != nil { - return err - } - } - default: - return fmt.Errorf("unsupported schema type: %v", path) - } - } - // Generate `depends_on` only for top-level resource because `dataToHcl` is called recursively - if len(path) == 0 && len(res.DependsOn) > 0 { - notIgnoredResources := []*resource{} - for _, dr := range res.DependsOn { - dr := dr - if dr.Data == nil { - tdr := ic.Scope.FindById(dr.Resource, dr.ID) - if tdr == nil { - log.Printf("[WARN] can't find resource %s in scope", dr) - continue - } - dr = tdr - } - if ic.Importables[dr.Resource].Ignore == nil || !ic.Importables[dr.Resource].Ignore(ic, dr) { - found := false - for _, v := range notIgnoredResources { - if v.ID == dr.ID && v.Resource == dr.Resource { - found = true - break - } - } - if !found { - notIgnoredResources = append(notIgnoredResources, dr) - } - } - } - if len(notIgnoredResources) > 0 { - toks := hclwrite.Tokens{} - toks = append(toks, &hclwrite.Token{ - Type: hclsyntax.TokenOBrack, - Bytes: []byte{'['}, - }) - for i, dr := range notIgnoredResources { - if i > 0 { - toks = append(toks, &hclwrite.Token{ - Type: hclsyntax.TokenComma, - Bytes: []byte{','}, - }) - } - toks = append(toks, hclwrite.TokensForTraversal(hcl.Traversal{ - hcl.TraverseRoot{Name: dr.Resource}, - hcl.TraverseAttr{Name: ic.ResourceName(dr)}, - })...) - } - toks = append(toks, &hclwrite.Token{ - Type: hclsyntax.TokenCBrack, - Bytes: []byte{']'}, - }) - body.SetAttributeRaw("depends_on", toks) - } - } - return nil -} - -func (ic *importContext) readListFromData(i importable, path []string, res *resource, - rawList []any, body *hclwrite.Body, as *schema.Schema, offsetConverter func(i int) string) error { - if len(rawList) == 0 { - return nil - } - name := path[len(path)-1] - switch elem := as.Elem.(type) { - case *schema.Resource: - if as.MaxItems == 1 { - nestedPath := append(path, offsetConverter(0)) - confBlock := body.AppendNewBlock(name, []string{}) - return ic.dataToHcl(i, nestedPath, elem, res, confBlock.Body()) - } - for offset := range rawList { - confBlock := body.AppendNewBlock(name, []string{}) - nestedPath := append(path, offsetConverter(offset)) - err := ic.dataToHcl(i, nestedPath, elem, res, confBlock.Body()) - if err != nil { - return err - } - } - case *schema.Schema: - toks := hclwrite.Tokens{} - toks = append(toks, &hclwrite.Token{ - Type: hclsyntax.TokenOBrack, - Bytes: []byte{'['}, - }) - for _, raw := range rawList { - if len(toks) != 1 { - toks = append(toks, &hclwrite.Token{ - Type: hclsyntax.TokenComma, - Bytes: []byte{','}, - }) - } - switch x := raw.(type) { - case string: - value := raw.(string) - toks = append(toks, ic.reference(i, path, value, cty.StringVal(value), res)...) - case int: - // probably we don't even use integer lists?... - toks = append(toks, hclwrite.TokensForValue( - cty.NumberIntVal(int64(x)))...) - default: - return fmt.Errorf("unsupported primitive list: %#v", path) - } - } - toks = append(toks, &hclwrite.Token{ - Type: hclsyntax.TokenCBrack, - Bytes: []byte{']'}, - }) - body.SetAttributeRaw(name, toks) - } - return nil -} diff --git a/exporter/context_test.go b/exporter/context_test.go index 204cb2c68c..d27e35925e 100644 --- a/exporter/context_test.go +++ b/exporter/context_test.go @@ -378,7 +378,7 @@ func TestGenerateResourceIdForWsObject(t *testing.T) { Importables: resourcesMap, Resources: p.ResourcesMap, } - rid, rtype := ic.generateResourceIdForWsObject(workspace.ObjectStatus{ + rid, rtype := ic.generateResourceIdForWorkspaceObject(workspace.ObjectStatus{ ObjectID: 123, Path: "Test", ObjectType: "Unknown", @@ -386,7 +386,7 @@ func TestGenerateResourceIdForWsObject(t *testing.T) { assert.Empty(t, rid) assert.Empty(t, rtype) - rid, rtype = ic.generateResourceIdForWsObject(workspace.ObjectStatus{ + rid, rtype = ic.generateResourceIdForWorkspaceObject(workspace.ObjectStatus{ ObjectID: 123, Path: "/Users/user@domain.com/TestDir", ObjectType: workspace.Directory, @@ -394,7 +394,7 @@ func TestGenerateResourceIdForWsObject(t *testing.T) { assert.Equal(t, "databricks_directory.users_user_domain_com_testdir_123", rid) assert.Equal(t, "databricks_directory", rtype) - rid, rtype = ic.generateResourceIdForWsObject(workspace.ObjectStatus{ + rid, rtype = ic.generateResourceIdForWorkspaceObject(workspace.ObjectStatus{ ObjectID: 123, Path: "/Users/user@domain.com/Test File", ObjectType: workspace.File, @@ -402,7 +402,7 @@ func TestGenerateResourceIdForWsObject(t *testing.T) { assert.Equal(t, "databricks_workspace_file.users_user_domain_com_test_file_123", rid) assert.Equal(t, "databricks_workspace_file", rtype) - rid, rtype = ic.generateResourceIdForWsObject(workspace.ObjectStatus{ + rid, rtype = ic.generateResourceIdForWorkspaceObject(workspace.ObjectStatus{ ObjectID: 123, Path: "/Users/user@domain.com/Test Notebook", ObjectType: workspace.Notebook, diff --git a/exporter/util.go b/exporter/util.go index 6bd91871ef..4562e14405 100644 --- a/exporter/util.go +++ b/exporter/util.go @@ -8,49 +8,37 @@ import ( "os" "path" "reflect" - "regexp" "strconv" "strings" - "sync" - "sync/atomic" "time" "github.com/databricks/terraform-provider-databricks/aws" "github.com/databricks/terraform-provider-databricks/clusters" "github.com/databricks/terraform-provider-databricks/common" - "github.com/databricks/terraform-provider-databricks/jobs" - "github.com/databricks/terraform-provider-databricks/scim" "github.com/databricks/terraform-provider-databricks/storage" - "github.com/databricks/terraform-provider-databricks/workspace" "github.com/databricks/databricks-sdk-go/service/catalog" - "github.com/databricks/databricks-sdk-go/service/compute" - "github.com/databricks/databricks-sdk-go/service/iam" - sdk_jobs "github.com/databricks/databricks-sdk-go/service/jobs" - - "golang.org/x/exp/slices" "github.com/hashicorp/hcl/v2/hclwrite" "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" "github.com/hashicorp/terraform-plugin-sdk/v2/terraform" ) -func (ic *importContext) emitInitScripts(initScripts []compute.InitScriptInfo) { - for _, is := range initScripts { - if is.Dbfs != nil { - ic.Emit(&resource{ - Resource: "databricks_dbfs_file", - ID: is.Dbfs.Destination, - }) - } - if is.Workspace != nil { - ic.emitWorkspaceFileOrRepo(is.Workspace.Destination) - } - if is.Volumes != nil { - // TODO: we should emit allow list for init scripts as well - ic.emitIfVolumeFile(is.Volumes.Destination) - } +func (ic *importContext) isServiceEnabled(service string) bool { + _, exists := ic.services[service] + return exists +} + +func (ic *importContext) isServiceInListing(service string) bool { + _, exists := ic.listing[service] + return exists +} + +func (ic *importContext) MatchesName(n string) bool { + if ic.match == "" { + return true } + return strings.Contains(strings.ToLower(n), strings.ToLower(ic.match)) } func (ic *importContext) emitFilesFromSlice(slice []string) { @@ -69,588 +57,12 @@ func (ic *importContext) emitFilesFromMap(m map[string]string) { } } -func (ic *importContext) importCluster(c *compute.ClusterSpec) { - if c == nil { - return - } - if c.AwsAttributes != nil && c.AwsAttributes.InstanceProfileArn != "" { - ic.Emit(&resource{ - Resource: "databricks_instance_profile", - ID: c.AwsAttributes.InstanceProfileArn, - }) - } - if c.InstancePoolId != "" { - // set enable_elastic_disk to false, and remove aws/gcp/azure_attributes - ic.Emit(&resource{ - Resource: "databricks_instance_pool", - ID: c.InstancePoolId, - }) - } - if c.DriverInstancePoolId != "" { - ic.Emit(&resource{ - Resource: "databricks_instance_pool", - ID: c.DriverInstancePoolId, - }) - } - if c.PolicyId != "" { - ic.Emit(&resource{ - Resource: "databricks_cluster_policy", - ID: c.PolicyId, - }) - } - ic.emitInitScripts(c.InitScripts) - ic.emitSecretsFromSecretsPathMap(c.SparkConf) - ic.emitSecretsFromSecretsPathMap(c.SparkEnvVars) - ic.emitUserOrServicePrincipal(c.SingleUserName) -} - -func (ic *importContext) emitSecretsFromSecretPathString(v string) { - if res := secretPathRegex.FindStringSubmatch(v); res != nil { - ic.Emit(&resource{ - Resource: "databricks_secret_scope", - ID: res[1], - }) - } -} - -func (ic *importContext) emitSecretsFromSecretsPathMap(m map[string]string) { - for _, v := range m { - ic.emitSecretsFromSecretPathString(v) - } -} - -func (ic *importContext) emitListOfUsers(users []string) { - for _, user := range users { - if user != "" { - ic.Emit(&resource{ - Resource: "databricks_user", - Attribute: "user_name", - Value: user, - }) - } - } -} - -func (ic *importContext) emitUserOrServicePrincipal(userOrSPName string) { - if userOrSPName == "" || !ic.isServiceEnabled("users") { - return - } - // Cache check here to avoid emitting - ic.emittedUsersMutex.RLock() - _, exists := ic.emittedUsers[userOrSPName] - ic.emittedUsersMutex.RUnlock() - if exists { - // log.Printf("[DEBUG] user or SP %s already emitted...", userOrSPName) - return - } - if common.StringIsUUID(userOrSPName) { - user, err := ic.findSpnByAppID(userOrSPName, false) - if err != nil { - log.Printf("[ERROR] Can't find SP with application ID %s", userOrSPName) - ic.addIgnoredResource(fmt.Sprintf("databricks_service_principal. application_id=%s", userOrSPName)) - } else { - ic.Emit(&resource{ - Resource: "databricks_service_principal", - ID: user.ID, - }) - } - } else { - user, err := ic.findUserByName(strings.ToLower(userOrSPName), false) - if err != nil { - log.Printf("[ERROR] Can't find user with name %s", userOrSPName) - ic.addIgnoredResource(fmt.Sprintf("databricks_user. user_name=%s", userOrSPName)) - } else { - ic.Emit(&resource{ - Resource: "databricks_user", - ID: user.ID, - }) - } - } - ic.emittedUsersMutex.Lock() - ic.emittedUsers[userOrSPName] = struct{}{} - ic.emittedUsersMutex.Unlock() -} - -func getUserOrSpNameAndDirectory(path, prefix string) (string, string) { - if !strings.HasPrefix(path, prefix) { - return "", "" - } - pathLen := len(path) - prefixLen := len(prefix) - searchStart := prefixLen + 1 - if pathLen <= searchStart { - return "", "" - } - pos := strings.Index(path[searchStart:pathLen], "/") - if pos == -1 { // we have only user directory... - return path[searchStart:pathLen], path - } - return path[searchStart : pos+searchStart], path[0 : pos+searchStart] -} - -func (ic *importContext) emitUserOrServicePrincipalForPath(path, prefix string) { - userOrSpName, _ := getUserOrSpNameAndDirectory(path, prefix) - if userOrSpName != "" { - ic.emitUserOrServicePrincipal(userOrSpName) - } -} - -func (ic *importContext) IsUserOrServicePrincipalDirectory(path, prefix string, strict bool) bool { - userOrSPName, userDir := getUserOrSpNameAndDirectory(path, prefix) - if userOrSPName == "" { - return false - } - // strict mode means that it should be exactly user dir, maybe with trailing `/` - if strict && !(len(path) == len(userDir) || (len(path) == len(userDir)+1 && path[len(path)-1] == '/')) { - return false - } - ic.userOrSpDirectoriesMutex.RLock() - result, exists := ic.userOrSpDirectories[userDir] - ic.userOrSpDirectoriesMutex.RUnlock() - if exists { - // log.Printf("[DEBUG] Directory %s already checked. Result=%v", userDir, result) - return result - } - var err error - if common.StringIsUUID(userOrSPName) { - _, err = ic.findSpnByAppID(userOrSPName, true) - if err != nil { - ic.addIgnoredResource(fmt.Sprintf("databricks_service_principal. application_id=%s", userOrSPName)) - } - } else { - _, err = ic.findUserByName(strings.ToLower(userOrSPName), true) - if err != nil { - ic.addIgnoredResource(fmt.Sprintf("databricks_user. user_name=%s", userOrSPName)) - } - } - ic.userOrSpDirectoriesMutex.Lock() - ic.userOrSpDirectories[userDir] = (err == nil) - ic.userOrSpDirectoriesMutex.Unlock() - return err == nil -} - -func (ic *importContext) emitRepoByPath(path string) { - // Path to Repos objects consits of following parts: /Repos, folder, repository, path inside Repo. - // Because it starts with `/`, it will produce empty string as first element in the slice. - // And we're stopping splitting to avoid producing too many not necessary parts, so we have 5 parts only. - parts := strings.SplitN(path, "/", 5) - if len(parts) >= 4 { - ic.Emit(&resource{ - Resource: "databricks_repo", - Attribute: "path", - Value: strings.Join(parts[:4], "/"), - }) - } else { - log.Printf("[WARN] Incorrect Repos path") - } -} - -func (ic *importContext) emitWorkspaceFileOrRepo(path string) { - if strings.HasPrefix(path, "/Repos") { - ic.emitRepoByPath(path) - } else { - // TODO: wrap this into ic.shouldEmit... - // TODO: strip /Workspace prefix if it's provided - ic.Emit(&resource{ - Resource: "databricks_workspace_file", - ID: path, - }) - } -} - -func (ic *importContext) emitNotebookOrRepo(path string) { - if strings.HasPrefix(path, "/Repos") { - ic.emitRepoByPath(path) - } else { - // TODO: strip /Workspace prefix if it's provided - ic.maybeEmitWorkspaceObject("databricks_notebook", path, nil) - } -} - -func (ic *importContext) getAllDirectories() []workspace.ObjectStatus { - if len(ic.allDirectories) == 0 { - objects := ic.getAllWorkspaceObjects(nil) - ic.wsObjectsMutex.Lock() - defer ic.wsObjectsMutex.Unlock() - if len(ic.allDirectories) == 0 { - for _, v := range objects { - if v.ObjectType == workspace.Directory { - ic.allDirectories = append(ic.allDirectories, v) - } - } - } - } - return ic.allDirectories -} - -// TODO: Ignore databricks_automl as well? -var directoriesToIgnore = []string{".ide", ".bundle", "__pycache__"} - -// TODO: add ignoring directories of deleted users? This could potentially decrease the number of processed objects... -func excludeAuxiliaryDirectories(v workspace.ObjectStatus) bool { - if v.ObjectType != workspace.Directory { - return true - } - // TODO: rewrite to use suffix check, etc., instead of split and slice contains? - parts := strings.Split(v.Path, "/") - result := len(parts) > 1 && slices.Contains[[]string, string](directoriesToIgnore, parts[len(parts)-1]) - if result { - log.Printf("[DEBUG] Ignoring directory %s", v.Path) - } - return !result -} - -func (ic *importContext) getAllWorkspaceObjects(visitor func([]workspace.ObjectStatus)) []workspace.ObjectStatus { - ic.wsObjectsMutex.Lock() - defer ic.wsObjectsMutex.Unlock() - if len(ic.allWorkspaceObjects) == 0 { - t1 := time.Now() - log.Print("[INFO] Starting to list all workspace objects") - notebooksAPI := workspace.NewNotebooksAPI(ic.Context, ic.Client) - ic.allWorkspaceObjects, _ = ListParallel(notebooksAPI, "/", excludeAuxiliaryDirectories, visitor) - log.Printf("[INFO] Finished listing of all workspace objects. %d objects in total. %v seconds", - len(ic.allWorkspaceObjects), time.Since(t1).Seconds()) - } - return ic.allWorkspaceObjects -} - -func (ic *importContext) emitGroups(u scim.User) { - for _, g := range u.Groups { - if g.Type != "direct" { - log.Printf("[DEBUG] Skipping non-direct group %s/%s for %s", g.Value, g.Display, u.DisplayName) - continue - } - ic.Emit(&resource{ - Resource: "databricks_group", - ID: g.Value, - }) - id := fmt.Sprintf("%s|%s", g.Value, u.ID) - ic.Emit(&resource{ - Resource: "databricks_group_member", - ID: id, - Name: fmt.Sprintf("%s_%s_%s_%s", g.Display, g.Value, u.DisplayName, u.ID), - Data: ic.makeGroupMemberData(id, g.Value, u.ID), - }) - } -} - -func (ic *importContext) emitRoles(objType string, id string, roles []scim.ComplexValue) { - log.Printf("[DEBUG] emitting roles for object type: %s, ID: %s, roles: %v", objType, id, roles) - for _, role := range roles { - if role.Type != "direct" { - continue - } - if !ic.accountLevel { - ic.Emit(&resource{ - Resource: "databricks_instance_profile", - ID: role.Value, - }) - } - ic.Emit(&resource{ - Resource: fmt.Sprintf("databricks_%s_role", objType), - ID: fmt.Sprintf("%s|%s", id, role.Value), - }) - } -} - -func (ic *importContext) emitLibraries(libs []compute.Library) { - for _, lib := range libs { - // Files on DBFS - ic.emitIfDbfsFile(lib.Whl) - ic.emitIfDbfsFile(lib.Jar) - ic.emitIfDbfsFile(lib.Egg) - // Files on WSFS - ic.emitIfWsfsFile(lib.Whl) - ic.emitIfWsfsFile(lib.Jar) - ic.emitIfWsfsFile(lib.Egg) - ic.emitIfWsfsFile(lib.Requirements) - // Files on UC Volumes - ic.emitIfVolumeFile(lib.Whl) - // TODO: we should emit UC allow list as well - ic.emitIfVolumeFile(lib.Jar) - ic.emitIfVolumeFile(lib.Requirements) - } -} - -func (ic *importContext) importLibraries(d *schema.ResourceData, s map[string]*schema.Schema) error { - var cll compute.InstallLibraries - common.DataToStructPointer(d, s, &cll) - ic.emitLibraries(cll.Libraries) - return nil -} - -func (ic *importContext) importClusterLibraries(d *schema.ResourceData, s map[string]*schema.Schema) error { - libraries := ic.workspaceClient.Libraries - cll, err := libraries.ClusterStatusByClusterId(ic.Context, d.Id()) - if err != nil { - return err - } - for _, lib := range cll.LibraryStatuses { - ic.emitIfDbfsFile(lib.Library.Egg) - ic.emitIfDbfsFile(lib.Library.Jar) - ic.emitIfDbfsFile(lib.Library.Whl) - // Files on UC Volumes - ic.emitIfVolumeFile(lib.Library.Whl) - ic.emitIfVolumeFile(lib.Library.Jar) - // Files on WSFS - ic.emitIfWsfsFile(lib.Library.Whl) - ic.emitIfWsfsFile(lib.Library.Jar) - } - return nil -} - -func (ic *importContext) cacheGroups() error { - ic.groupsMutex.Lock() - defer ic.groupsMutex.Unlock() - if ic.allGroups == nil { - log.Printf("[INFO] Caching groups in memory ...") - var groups *[]iam.Group - var err error - err = runWithRetries(func() error { - var grps []iam.Group - var err error - if ic.accountLevel { - grps, err = ic.accountClient.Groups.ListAll(ic.Context, iam.ListAccountGroupsRequest{ - Attributes: "id", - }) - } else { - grps, err = ic.workspaceClient.Groups.ListAll(ic.Context, iam.ListGroupsRequest{ - Attributes: "id", - }) - } - if err != nil { - return err - } - groups = &grps - return nil - }, "error fetching full list of groups") - if err != nil { - log.Printf("[ERROR] can't fetch list of groups. Error: %v", err) - return err - } - api := scim.NewGroupsAPI(ic.Context, ic.Client) - groupsCount := len(*groups) - ic.allGroups = make([]scim.Group, 0, groupsCount) - for i, g := range *groups { - err = runWithRetries(func() error { - group, err := api.Read(g.Id, "id,displayName,active,externalId,entitlements,groups,roles,members") - if err != nil { - return err - } - ic.allGroups = append(ic.allGroups, group) - return nil - }, "error reading group with ID "+g.Id) - if err != nil { - log.Printf("[ERROR] Error reading group with ID %s: %v", g.Id, err) - continue - } - if (i+1)%10 == 0 { - log.Printf("[DEBUG] Read %d out of %d groups", i+1, groupsCount) - } - } - log.Printf("[INFO] Cached %d groups", len(ic.allGroups)) - } - return nil -} - func (ic *importContext) addIgnoredResource(msg string) { ic.ignoredResourcesMutex.Lock() defer ic.ignoredResourcesMutex.Unlock() ic.ignoredResources[msg] = struct{}{} } -const ( - nonExistingUserOrSp = "__USER_OR_SPN_DOES_NOT_EXIST__" -) - -func (ic *importContext) getUsersMapping() { - ic.allUsersMutex.RLocker().Lock() - userMapping := ic.allUsersMapping - ic.allUsersMutex.RLocker().Unlock() - if userMapping == nil { - ic.allUsersMutex.Lock() - defer ic.allUsersMutex.Unlock() - if ic.allUsersMapping != nil { - return - } - ic.allUsersMapping = make(map[string]string) - err := runWithRetries(func() error { - var users []iam.User - var err error - if ic.accountLevel { - users, err = ic.accountClient.Users.ListAll(ic.Context, iam.ListAccountUsersRequest{ - Attributes: "id,userName", - }) - } else { - users, err = ic.workspaceClient.Users.ListAll(ic.Context, iam.ListUsersRequest{ - Attributes: "id,userName", - }) - } - if err != nil { - return err - } - for _, user := range users { - ic.allUsersMapping[user.UserName] = user.Id - } - log.Printf("[DEBUG] users are copied") - return nil - }, "error fetching full list of users") - if err != nil { - log.Fatalf("[ERROR] can't fetch list of users after few retries: error=%v", err) - } - } -} - -func (ic *importContext) findUserByName(name string, fastCheck bool) (u *scim.User, err error) { - log.Printf("[DEBUG] Looking for user %s", name) - ic.usersMutex.RLocker().Lock() - user, exists := ic.allUsers[name] - ic.usersMutex.RLocker().Unlock() - if exists { - if user.UserName == nonExistingUserOrSp { - log.Printf("[DEBUG] non-existing user %s is found in the cache", name) - err = fmt.Errorf("user %s is not found", name) - } else { - log.Printf("[DEBUG] existing user %s is found in the cache", name) - u = &user - } - return - } - ic.getUsersMapping() - ic.allUsersMutex.RLocker().Lock() - userId, exists := ic.allUsersMapping[name] - ic.allUsersMutex.RLocker().Unlock() - if !exists { - err = fmt.Errorf("there is no user '%s'", name) - u = &scim.User{UserName: nonExistingUserOrSp} - } else { - if fastCheck { - return &scim.User{UserName: name}, nil - } - a := scim.NewUsersAPI(ic.Context, ic.Client) - err = runWithRetries(func() error { - usr, err := a.Read(userId, "id,userName,displayName,active,externalId,entitlements,groups,roles") - if err != nil { - return err - } - u = &usr - return nil - }, fmt.Sprintf("error reading user with name '%s', user ID: %s", name, userId)) - if err != nil { - log.Printf("[WARN] error reading user with name '%s', user ID: %s", name, userId) - u = &scim.User{UserName: nonExistingUserOrSp} - } - } - ic.usersMutex.Lock() - defer ic.usersMutex.Unlock() - ic.allUsers[name] = *u - return -} - -func (ic *importContext) getSpsMapping() { - ic.spsMutex.Lock() - defer ic.spsMutex.Unlock() - if ic.allSpsMapping == nil { - ic.allSpsMapping = make(map[string]string) - err := runWithRetries(func() error { - var sps []iam.ServicePrincipal - var err error - if ic.accountLevel { - sps, err = ic.accountClient.ServicePrincipals.ListAll(ic.Context, iam.ListAccountServicePrincipalsRequest{ - Attributes: "id,userName", - }) - } else { - sps, err = ic.workspaceClient.ServicePrincipals.ListAll(ic.Context, iam.ListServicePrincipalsRequest{ - Attributes: "id,userName", - }) - } - if err != nil { - return err - } - for _, sp := range sps { - ic.allSpsMapping[sp.ApplicationId] = sp.Id - } - return nil - }, "error fetching full list of service principals") - if err != nil { - log.Fatalf("[ERROR] can't fetch list of service principals after few retries: error=%v", err) - } - } -} - -func (ic *importContext) getBuiltinPolicyFamilies() map[string]compute.PolicyFamily { - ic.builtInPoliciesMutex.Lock() - defer ic.builtInPoliciesMutex.Unlock() - if ic.builtInPolicies == nil { - if !ic.accountLevel { - log.Printf("[DEBUG] Going to initialize ic.builtInPolicies. Getting policy families...") - families, err := ic.workspaceClient.PolicyFamilies.ListAll(ic.Context, compute.ListPolicyFamiliesRequest{}) - log.Printf("[DEBUG] Going to initialize ic.builtInPolicies. Getting policy families...") - if err == nil { - ic.builtInPolicies = make(map[string]compute.PolicyFamily, len(families)) - for _, f := range families { - f2 := f - ic.builtInPolicies[f2.PolicyFamilyId] = f2 - } - } else { - log.Printf("[ERROR] Can't fetch cluster policy families: %v", err) - ic.builtInPolicies = map[string]compute.PolicyFamily{} - } - } else { - log.Print("[WARN] Can't list cluster policy families on account level") - ic.builtInPolicies = map[string]compute.PolicyFamily{} - } - } - return ic.builtInPolicies -} - -func (ic *importContext) findSpnByAppID(applicationID string, fastCheck bool) (u *scim.User, err error) { - log.Printf("[DEBUG] Looking for SP %s", applicationID) - ic.spsMutex.RLocker().Lock() - sp, exists := ic.allSps[applicationID] - ic.spsMutex.RLocker().Unlock() - if exists { - if sp.ApplicationID == nonExistingUserOrSp { - log.Printf("[DEBUG] non-existing SP %s is found in the cache", applicationID) - err = fmt.Errorf("service principal %s is not found", applicationID) - } else { - log.Printf("[DEBUG] existing SP %s is found in the cache", applicationID) - u = &sp - } - return - } - ic.getSpsMapping() - ic.spsMutex.RLocker().Lock() - spId, exists := ic.allSpsMapping[applicationID] - ic.spsMutex.RLocker().Unlock() - if !exists { - err = fmt.Errorf("there is no service principal '%s'", applicationID) - u = &scim.User{ApplicationID: nonExistingUserOrSp} - } else { - if fastCheck { - return &scim.User{ApplicationID: applicationID}, nil - } - a := scim.NewServicePrincipalsAPI(ic.Context, ic.Client) - err = runWithRetries(func() error { - usr, err := a.Read(spId, "userName,displayName,active,externalId,entitlements,groups,roles") - if err != nil { - return err - } - u = &usr - return nil - }, fmt.Sprintf("error reading service principal with AppID '%s', SP ID: %s", applicationID, spId)) - if err != nil { - log.Printf("[WARN] error reading service principal with AppID '%s', SP ID: %s", applicationID, spId) - u = &scim.User{ApplicationID: nonExistingUserOrSp} - } - } - ic.spsMutex.Lock() - defer ic.spsMutex.Unlock() - ic.allSps[applicationID] = *u - - return -} - func (ic *importContext) emitIfDbfsFile(path string) { if strings.HasPrefix(path, "dbfs:") { if strings.HasPrefix(path, "dbfs:/Volumes/") { @@ -865,28 +277,6 @@ func eitherString(a any, b any) string { return "" } -func (ic *importContext) importJobs(l []jobs.Job) { - i := 0 - for offset, job := range l { - if !ic.MatchesName(job.Settings.Name) { - log.Printf("[INFO] Job name %s doesn't match selection %s", job.Settings.Name, ic.match) - continue - } - if job.Settings.Deployment != nil && job.Settings.Deployment.Kind == "BUNDLE" && - job.Settings.EditMode == "UI_LOCKED" { - log.Printf("[INFO] Skipping job '%s' because it's deployed by DABs", job.Settings.Name) - continue - } - ic.Emit(&resource{ - Resource: "databricks_job", - ID: job.ID(), - }) - i++ - log.Printf("[INFO] Scanned %d of total %d jobs", offset+1, len(l)) - } - log.Printf("[INFO] %d of total %d jobs are going to be imported", i, len(l)) -} - func (ic *importContext) createFileIn(dir, name string) (*os.File, string, error) { fileName := ic.prefix + name localFileName := fmt.Sprintf("%s/%s/%s", ic.Directory, dir, fileName) @@ -925,47 +315,6 @@ func defaultShouldOmitFieldFunc(ic *importContext, pathString string, as *schema return false } -func makeShouldOmitFieldForCluster(regex *regexp.Regexp) func(ic *importContext, pathString string, as *schema.Schema, d *schema.ResourceData) bool { - return func(ic *importContext, pathString string, as *schema.Schema, d *schema.ResourceData) bool { - prefix := "" - if regex != nil { - if res := regex.FindStringSubmatch(pathString); res != nil { - prefix = res[0] - } else { - return false - } - } - raw := d.Get(pathString) - if raw != nil { - v := reflect.ValueOf(raw) - if as.Optional && v.IsZero() { - return true - } - } - workerInstPoolID := d.Get(prefix + "instance_pool_id").(string) - switch pathString { - case prefix + "node_type_id": - return workerInstPoolID != "" - case prefix + "driver_node_type_id": - driverInstPoolID := d.Get(prefix + "driver_instance_pool_id").(string) - nodeTypeID := d.Get(prefix + "node_type_id").(string) - return workerInstPoolID != "" || driverInstPoolID != "" || raw.(string) == nodeTypeID - case prefix + "driver_instance_pool_id": - return raw.(string) == workerInstPoolID - case prefix + "enable_elastic_disk", prefix + "aws_attributes", prefix + "azure_attributes", prefix + "gcp_attributes": - return workerInstPoolID != "" - case prefix + "enable_local_disk_encryption": - return false - case prefix + "spark_conf": - return fmt.Sprintf("%v", d.Get(prefix+"spark_conf")) == "map[spark.databricks.delta.preview.enabled:true]" - case prefix + "spark_env_vars": - return fmt.Sprintf("%v", d.Get(prefix+"spark_env_vars")) == "map[PYSPARK_PYTHON:/databricks/python3/bin/python3]" - } - - return defaultShouldOmitFieldFunc(ic, pathString, as, d) - } -} - func resourceOrDataBlockBody(ic *importContext, body *hclwrite.Body, r *resource) error { blockType := "resource" if r.Mode == "data" { @@ -980,73 +329,6 @@ func generateUniqueID(v string) string { return fmt.Sprintf("%x", sha1.Sum([]byte(v)))[:10] } -func shouldOmitMd5Field(ic *importContext, pathString string, as *schema.Schema, d *schema.ResourceData) bool { - if pathString == "md5" { // `md5` is kind of computed, but not declared as it... - return true - } - return defaultShouldOmitFieldFunc(ic, pathString, as, d) -} - -func workspaceObjectResouceName(ic *importContext, d *schema.ResourceData) string { - name := d.Get("path").(string) - if name == "" { - return d.Id() - } else { - name = nameNormalizationRegex.ReplaceAllString(name[1:], "_") + "_" + - strconv.FormatInt(int64(d.Get("object_id").(int)), 10) - } - return name -} - -func wsObjectGetModifiedAt(obs workspace.ObjectStatus) int64 { - if obs.ModifiedAtInteractive != nil && obs.ModifiedAtInteractive.TimeMillis != 0 { - return obs.ModifiedAtInteractive.TimeMillis - } - return obs.ModifiedAt -} - -func (ic *importContext) shouldEmitForPath(path string) bool { - if !ic.exportDeletedUsersAssets && strings.HasPrefix(path, "/Users/") { - return ic.IsUserOrServicePrincipalDirectory(path, "/Users", false) - } - return true -} - -func (ic *importContext) maybeEmitWorkspaceObject(resourceType, path string, obj *workspace.ObjectStatus) { - if ic.shouldEmitForPath(path) { - var data *schema.ResourceData - if obj != nil { - switch resourceType { - case "databricks_notebook": - data = workspace.ResourceNotebook().ToResource().TestResourceData() - case "databricks_workspace_file": - data = workspace.ResourceWorkspaceFile().ToResource().TestResourceData() - case "databricks_directory": - data = workspace.ResourceDirectory().ToResource().TestResourceData() - } - if data != nil { - scm := ic.Resources[resourceType].Schema - data.MarkNewResource() - data.SetId(path) - err := common.StructToData(obj, scm, data) - if err != nil { - log.Printf("[ERROR] can't convert %s object to data: %v. obj=%v", resourceType, err, obj) - data = nil - } - } - } - ic.Emit(&resource{ - Resource: resourceType, - ID: path, - Data: data, - Incremental: ic.incremental, - }) - } else { - log.Printf("[WARN] Not emitting a workspace object %s for deleted user. Path='%s'", resourceType, path) - ic.addIgnoredResource(fmt.Sprintf("%s. path=%s", resourceType, path)) - } -} - func (ic *importContext) enableServices(services string) { ic.services = map[string]struct{}{} for _, s := range strings.Split(services, ",") { @@ -1079,114 +361,6 @@ func (ic *importContext) emitSqlParentDirectory(parent string) { } } -func (ic *importContext) shouldSkipWorkspaceObject(object workspace.ObjectStatus, updatedSinceMs int64) bool { - if ic.incremental && object.ObjectType == workspace.Directory { - return true - } - if !(object.ObjectType == workspace.Notebook || object.ObjectType == workspace.File) || - strings.HasPrefix(object.Path, "/Repos") { - // log.Printf("[DEBUG] Skipping unsupported entry %v", object) - return true - } - if res := ignoreIdeFolderRegex.FindStringSubmatch(object.Path); res != nil { - return true - } - modifiedAt := wsObjectGetModifiedAt(object) - if ic.incremental && modifiedAt < updatedSinceMs { - p := ic.oldWorkspaceObjectMapping[object.ObjectID] - if p == "" || p == object.Path { - log.Printf("[DEBUG] skipping '%s' that was modified at %d (last active=%d)", - object.Path, modifiedAt, updatedSinceMs) - return true - } - log.Printf("[DEBUG] Different path for object %d. Old='%s', New='%s'", object.ObjectID, p, object.Path) - } - if !ic.MatchesName(object.Path) { - return true - } - return false -} - -func emitWorkpaceObject(ic *importContext, object workspace.ObjectStatus) { - // check the size of the default channel, and add delays if it has less than %20 capacity left. - // In this case we won't need to have increase the size of the default channel to extended capacity. - defChannelSize := len(ic.defaultChannel) - if float64(defChannelSize) > float64(ic.defaultHanlerChannelSize)*0.8 { - log.Printf("[DEBUG] waiting a bit before emitting a resource because default channel is 80%% full (%d): %v", - defChannelSize, object) - time.Sleep(1 * time.Second) - } - switch object.ObjectType { - case workspace.Notebook: - ic.maybeEmitWorkspaceObject("databricks_notebook", object.Path, &object) - case workspace.File: - ic.maybeEmitWorkspaceObject("databricks_workspace_file", object.Path, &object) - case workspace.Directory: - ic.maybeEmitWorkspaceObject("databricks_directory", object.Path, &object) - default: - log.Printf("[WARN] unknown type %s for path %s", object.ObjectType, object.Path) - } -} - -func listNotebooksAndWorkspaceFiles(ic *importContext) error { - objectsChannel := make(chan workspace.ObjectStatus, defaultChannelSize) - numRoutines := 2 // TODO: make configurable? together with the channel size? - var processedObjects atomic.Uint64 - for i := 0; i < numRoutines; i++ { - num := i - ic.waitGroup.Add(1) - go func() { - log.Printf("[DEBUG] Starting channel %d for workspace objects", num) - for object := range objectsChannel { - processedObjects.Add(1) - ic.waitGroup.Add(1) - emitWorkpaceObject(ic, object) - ic.waitGroup.Done() - } - log.Printf("[DEBUG] channel %d for workspace objects is finished", num) - ic.waitGroup.Done() - }() - } - // There are two use cases - this function will handle listing, or it will receive listing - updatedSinceMs := ic.getUpdatedSinceMs() - allObjects := ic.getAllWorkspaceObjects(func(objects []workspace.ObjectStatus) { - for _, object := range objects { - if object.ObjectType == workspace.Directory { - if !ic.incremental && object.Path != "/" && ic.isServiceInListing("directories") { - objectsChannel <- object - } - } else { - if ic.shouldSkipWorkspaceObject(object, updatedSinceMs) { - continue - } - object := object - switch object.ObjectType { - case workspace.Notebook, workspace.File: - objectsChannel <- object - default: - log.Printf("[WARN] unknown type %s for path %s", object.ObjectType, object.Path) - } - } - } - }) - close(objectsChannel) - log.Printf("[DEBUG] processedObjects=%d", processedObjects.Load()) - if processedObjects.Load() == 0 { // we didn't have side effect from listing as it was already happened - log.Printf("[DEBUG] ic.getAllWorkspaceObjects already was called before, so we need to explicitly submit all objects") - for _, object := range allObjects { - if ic.shouldSkipWorkspaceObject(object, updatedSinceMs) { - continue - } - if object.ObjectType == workspace.Directory && !ic.incremental && ic.isServiceInListing("directories") && object.Path != "/" { - emitWorkpaceObject(ic, object) - } else if (object.ObjectType == workspace.Notebook || object.ObjectType == workspace.File) && ic.isServiceInListing("notebooks") { - emitWorkpaceObject(ic, object) - } - } - } - return nil -} - func (ic *importContext) getLastActiveMs() int64 { if ic.lastActiveMs == 0 { ic.lastActiveMs = (time.Now().Unix() - ic.lastActiveDays*24*60*60) * 1000 @@ -1213,106 +387,6 @@ func getEnvAsInt(envName string, defaultValue int) int { return defaultValue } -// Parallel listing implementation -type syncAnswer struct { - MU sync.Mutex - data []workspace.ObjectStatus -} - -func (a *syncAnswer) append(objs []workspace.ObjectStatus) { - a.MU.Lock() - a.data = append(a.data, objs...) - a.MU.Unlock() -} - -type directoryInfo struct { - Path string - Attempts int -} - -// constants related to the parallel listing -const ( - envVarListParallelism = "EXPORTER_WS_LIST_PARALLELISM" - envVarDirectoryChannelSize = "EXPORTER_DIRECTORIES_CHANNEL_SIZE" - defaultWorkersPoolSize = 10 - defaultDirectoryChannelSize = 100000 -) - -func recursiveAddPathsParallel(a workspace.NotebooksAPI, directory directoryInfo, dirChannel chan directoryInfo, - answer *syncAnswer, wg *sync.WaitGroup, shouldIncludeDir func(workspace.ObjectStatus) bool, visitor func([]workspace.ObjectStatus)) { - defer wg.Done() - notebookInfoList, err := a.ListInternalImpl(directory.Path) - if err != nil { - log.Printf("[WARN] error listing '%s': %v", directory.Path, err) - if isRetryableError(err.Error(), directory.Attempts) { - wg.Add(1) - log.Printf("[INFO] attempt %d of retrying listing of '%s' after error: %v", - directory.Attempts+1, directory.Path, err) - time.Sleep(time.Duration(retryDelaySeconds) * time.Second) - dirChannel <- directoryInfo{Path: directory.Path, Attempts: directory.Attempts + 1} - } - } - - newList := make([]workspace.ObjectStatus, 0, len(notebookInfoList)) - directories := make([]workspace.ObjectStatus, 0, len(notebookInfoList)) - for _, v := range notebookInfoList { - if v.ObjectType == workspace.Directory { - if shouldIncludeDir(v) { - newList = append(newList, v) - directories = append(directories, v) - } - } else { - newList = append(newList, v) - } - } - answer.append(newList) - for _, v := range directories { - wg.Add(1) - log.Printf("[DEBUG] putting directory '%s' into channel. Channel size: %d", v.Path, len(dirChannel)) - dirChannel <- directoryInfo{Path: v.Path} - time.Sleep(3 * time.Millisecond) - } - if visitor != nil { - visitor(newList) - } -} - -func ListParallel(a workspace.NotebooksAPI, path string, shouldIncludeDir func(workspace.ObjectStatus) bool, - visitor func([]workspace.ObjectStatus)) ([]workspace.ObjectStatus, error) { - var answer syncAnswer - wg := &sync.WaitGroup{} - - if shouldIncludeDir == nil { - shouldIncludeDir = func(workspace.ObjectStatus) bool { return true } - } - - numWorkers := getEnvAsInt(envVarListParallelism, defaultWorkersPoolSize) - channelSize := getEnvAsInt(envVarDirectoryChannelSize, defaultDirectoryChannelSize) - dirChannel := make(chan directoryInfo, channelSize) - for i := 0; i < numWorkers; i++ { - t := i - go func() { - log.Printf("[DEBUG] starting go routine %d", t) - for directory := range dirChannel { - log.Printf("[DEBUG] processing directory %s", directory.Path) - recursiveAddPathsParallel(a, directory, dirChannel, &answer, wg, shouldIncludeDir, visitor) - } - }() - - } - log.Print("[DEBUG] pushing initial path to channel") - wg.Add(1) - recursiveAddPathsParallel(a, directoryInfo{Path: path}, dirChannel, &answer, wg, shouldIncludeDir, visitor) - log.Print("[DEBUG] starting to wait") - wg.Wait() - log.Print("[DEBUG] closing the directory channel") - close(dirChannel) - - answer.MU.Lock() - defer answer.MU.Unlock() - return answer.data, nil -} - var ( maxRetries = 5 retryDelaySeconds = 2 @@ -1365,24 +439,16 @@ func appendEndingSlashToDirName(dir string) string { } func isMatchingCatalogAndSchema(ic *importContext, res *resource, ra *resourceApproximation, origPath string) bool { - // log.Printf("[DEBUG] matchingCatalogAndSchema: resource: %s, origPath=%s", res.Resource, origPath) res_catalog_name := res.Data.Get("catalog_name").(string) res_schema_name := res.Data.Get("schema_name").(string) - // log.Printf("[DEBUG] matchingCatalogAndSchema: resource: %s, catalog='%s' schema='%s'", - // res.Resource, res_catalog_name, res_schema_name) ra_catalog_name, cat_found := ra.Get("catalog_name") ra_schema_name, schema_found := ra.Get("name") - // log.Printf("[DEBUG] matchingCatalogAndSchema: approximation: %s %s, catalog='%v' (found? %v) schema='%v' (found? %v)", - // ra.Type, ra.Name, ra_catalog_name, cat_found, ra_schema_name, schema_found) if !cat_found || !schema_found { log.Printf("[WARN] Can't find attributes in approximation: %s %s, catalog='%v' (found? %v) schema='%v' (found? %v). Resource: %s, catalog='%s', schema='%s'", ra.Type, ra.Name, ra_catalog_name, cat_found, ra_schema_name, schema_found, res.Resource, res_catalog_name, res_schema_name) return true } - result := ra_catalog_name.(string) == res_catalog_name && ra_schema_name.(string) == res_schema_name - // log.Printf("[DEBUG] matchingCatalogAndSchema: result: %v approximation: catalog='%v' schema='%v', res: catalog='%s' schema='%s'", - // result, ra_catalog_name, ra_schema_name, res_catalog_name, res_schema_name) return result } @@ -1403,10 +469,6 @@ func isMatchingCatalogAndSchemaInModelServing(ic *importContext, res *resource, func isMatchingShareRecipient(ic *importContext, res *resource, ra *resourceApproximation, origPath string) bool { shareName, ok := res.Data.GetOk("share") - // principal := res.Data.Get(origPath) - // log.Printf("[DEBUG] isMatchingShareRecipient: origPath='%s', ra.Type='%s', shareName='%v', ok? %v, principal='%v'", - // origPath, ra.Type, shareName, ok, principal) - return ok && shareName.(string) != "" } @@ -1414,10 +476,6 @@ func isMatchignShareObject(obj string) isValidAproximationFunc { return func(ic *importContext, res *resource, ra *resourceApproximation, origPath string) bool { objPath := strings.Replace(origPath, ".name", ".data_object_type", 1) objType, ok := res.Data.GetOk(objPath) - // name := res.Data.Get(origPath) - // log.Printf("[DEBUG] isMatchignShareObject: %s origPath='%s', ra.Type='%s', name='%v', objPath='%s' objType='%v' ok? %v", - // obj, origPath, ra.Type, name, objPath, objType, ok) - return ok && objType.(string) == obj } } @@ -1426,10 +484,6 @@ func isMatchingAllowListArtifact(ic *importContext, res *resource, ra *resourceA objPath := strings.Replace(origPath, ".artifact", ".match_type", 1) matchType, ok := res.Data.GetOk(objPath) artifactType := res.Data.Get("artifact_type").(string) - // artifact := res.Data.Get(origPath) - // log.Printf("[DEBUG] isMatchingAllowListArtifact: origPath='%s', ra.Type='%s', artifactType='%v' artifact='%v', objPath='%s' matchType='%v' ok? %v", - // origPath, ra.Type, artifactType, artifact, objPath, matchType, ok) - return ok && matchType.(string) == "PREFIX_MATCH" && (artifactType == "LIBRARY_JAR" || artifactType == "INIT_SCRIPT") } @@ -1483,20 +537,6 @@ func (ic *importContext) emitPermissionsIfNotIgnored(r *resource, id, name strin } } -func (ic *importContext) emitWorkspaceObjectParentDirectory(r *resource) { - if !ic.isServiceEnabled("directories") { - return - } - if idx := strings.LastIndex(r.ID, "/"); idx > 0 { // not found, or directly in the root... - directoryPath := r.ID[:idx] - ic.Emit(&resource{ - Resource: "databricks_directory", - ID: directoryPath, - }) - r.AddExtraData(ParentDirectoryExtraKey, directoryPath) - } -} - func dltIsMatchingCatalogAndSchema(ic *importContext, res *resource, ra *resourceApproximation, origPath string) bool { res_catalog_name := res.Data.Get("catalog").(string) if res_catalog_name == "" { @@ -1515,15 +555,6 @@ func dltIsMatchingCatalogAndSchema(ic *importContext, res *resource, ra *resourc return result } -func (ic *importContext) makeGroupMemberData(id, groupId, memberId string) *schema.ResourceData { - data := scim.ResourceGroupMember().ToResource().TestResourceData() - data.MarkNewResource() - data.SetId(id) - data.Set("group_id", groupId) - data.Set("member_id", memberId) - return data -} - func (ic *importContext) emitWorkspaceBindings(securableType, securableName string) { bindings, err := ic.workspaceClient.WorkspaceBindings.GetBindings(ic.Context, catalog.GetBindingsRequest{ SecurableName: securableName, @@ -1563,12 +594,3 @@ func isMatchingSecurableTypeAndName(ic *importContext, res *resource, ra *resour ra_name, _ := ra.Get("name") return ra.Type == ("databricks_"+res_securable_type) && ra_name.(string) == res_securable_name } - -func (ic *importContext) emitJobsDestinationNotifications(notifications []sdk_jobs.Webhook) { - for _, notification := range notifications { - ic.Emit(&resource{ - Resource: "databricks_notification_destination", - ID: notification.Id, - }) - } -} diff --git a/exporter/util_compute.go b/exporter/util_compute.go new file mode 100644 index 0000000000..7967b9ebc9 --- /dev/null +++ b/exporter/util_compute.go @@ -0,0 +1,228 @@ +package exporter + +import ( + "fmt" + "log" + "reflect" + "regexp" + + "github.com/databricks/terraform-provider-databricks/common" + "github.com/databricks/terraform-provider-databricks/jobs" + + "github.com/databricks/databricks-sdk-go/service/compute" + sdk_jobs "github.com/databricks/databricks-sdk-go/service/jobs" + + "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" +) + +func (ic *importContext) emitInitScripts(initScripts []compute.InitScriptInfo) { + for _, is := range initScripts { + if is.Dbfs != nil { + ic.Emit(&resource{ + Resource: "databricks_dbfs_file", + ID: is.Dbfs.Destination, + }) + } + if is.Workspace != nil { + ic.emitWorkspaceFileOrRepo(is.Workspace.Destination) + } + if is.Volumes != nil { + // TODO: we should emit allow list for init scripts as well + ic.emitIfVolumeFile(is.Volumes.Destination) + } + } +} + +func (ic *importContext) importCluster(c *compute.ClusterSpec) { + if c == nil { + return + } + if c.AwsAttributes != nil && c.AwsAttributes.InstanceProfileArn != "" { + ic.Emit(&resource{ + Resource: "databricks_instance_profile", + ID: c.AwsAttributes.InstanceProfileArn, + }) + } + if c.InstancePoolId != "" { + // set enable_elastic_disk to false, and remove aws/gcp/azure_attributes + ic.Emit(&resource{ + Resource: "databricks_instance_pool", + ID: c.InstancePoolId, + }) + } + if c.DriverInstancePoolId != "" { + ic.Emit(&resource{ + Resource: "databricks_instance_pool", + ID: c.DriverInstancePoolId, + }) + } + if c.PolicyId != "" { + ic.Emit(&resource{ + Resource: "databricks_cluster_policy", + ID: c.PolicyId, + }) + } + ic.emitInitScripts(c.InitScripts) + ic.emitSecretsFromSecretsPathMap(c.SparkConf) + ic.emitSecretsFromSecretsPathMap(c.SparkEnvVars) + ic.emitUserOrServicePrincipal(c.SingleUserName) +} + +func (ic *importContext) emitSecretsFromSecretPathString(v string) { + if res := secretPathRegex.FindStringSubmatch(v); res != nil { + ic.Emit(&resource{ + Resource: "databricks_secret_scope", + ID: res[1], + }) + } +} + +func (ic *importContext) emitSecretsFromSecretsPathMap(m map[string]string) { + for _, v := range m { + ic.emitSecretsFromSecretPathString(v) + } +} + +func (ic *importContext) emitLibraries(libs []compute.Library) { + for _, lib := range libs { + // Files on DBFS + ic.emitIfDbfsFile(lib.Whl) + ic.emitIfDbfsFile(lib.Jar) + ic.emitIfDbfsFile(lib.Egg) + // Files on WSFS + ic.emitIfWsfsFile(lib.Whl) + ic.emitIfWsfsFile(lib.Jar) + ic.emitIfWsfsFile(lib.Egg) + ic.emitIfWsfsFile(lib.Requirements) + // Files on UC Volumes + ic.emitIfVolumeFile(lib.Whl) + // TODO: we should emit UC allow list as well + ic.emitIfVolumeFile(lib.Jar) + ic.emitIfVolumeFile(lib.Requirements) + } +} + +func (ic *importContext) importLibraries(d *schema.ResourceData, s map[string]*schema.Schema) error { + var cll compute.InstallLibraries + common.DataToStructPointer(d, s, &cll) + ic.emitLibraries(cll.Libraries) + return nil +} + +func (ic *importContext) importClusterLibraries(d *schema.ResourceData, s map[string]*schema.Schema) error { + libraries := ic.workspaceClient.Libraries + cll, err := libraries.ClusterStatusByClusterId(ic.Context, d.Id()) + if err != nil { + return err + } + for _, lib := range cll.LibraryStatuses { + ic.emitIfDbfsFile(lib.Library.Egg) + ic.emitIfDbfsFile(lib.Library.Jar) + ic.emitIfDbfsFile(lib.Library.Whl) + // Files on UC Volumes + ic.emitIfVolumeFile(lib.Library.Whl) + ic.emitIfVolumeFile(lib.Library.Jar) + // Files on WSFS + ic.emitIfWsfsFile(lib.Library.Whl) + ic.emitIfWsfsFile(lib.Library.Jar) + } + return nil +} + +func (ic *importContext) getBuiltinPolicyFamilies() map[string]compute.PolicyFamily { + ic.builtInPoliciesMutex.Lock() + defer ic.builtInPoliciesMutex.Unlock() + if ic.builtInPolicies == nil { + if !ic.accountLevel { + log.Printf("[DEBUG] Going to initialize ic.builtInPolicies. Getting policy families...") + families, err := ic.workspaceClient.PolicyFamilies.ListAll(ic.Context, compute.ListPolicyFamiliesRequest{}) + log.Printf("[DEBUG] Going to initialize ic.builtInPolicies. Getting policy families...") + if err == nil { + ic.builtInPolicies = make(map[string]compute.PolicyFamily, len(families)) + for _, f := range families { + f2 := f + ic.builtInPolicies[f2.PolicyFamilyId] = f2 + } + } else { + log.Printf("[ERROR] Can't fetch cluster policy families: %v", err) + ic.builtInPolicies = map[string]compute.PolicyFamily{} + } + } else { + log.Print("[WARN] Can't list cluster policy families on account level") + ic.builtInPolicies = map[string]compute.PolicyFamily{} + } + } + return ic.builtInPolicies +} + +func (ic *importContext) importJobs(l []jobs.Job) { + i := 0 + for offset, job := range l { + if !ic.MatchesName(job.Settings.Name) { + log.Printf("[INFO] Job name %s doesn't match selection %s", job.Settings.Name, ic.match) + continue + } + if job.Settings.Deployment != nil && job.Settings.Deployment.Kind == "BUNDLE" && + job.Settings.EditMode == "UI_LOCKED" { + log.Printf("[INFO] Skipping job '%s' because it's deployed by DABs", job.Settings.Name) + continue + } + ic.Emit(&resource{ + Resource: "databricks_job", + ID: job.ID(), + }) + i++ + log.Printf("[INFO] Scanned %d of total %d jobs", offset+1, len(l)) + } + log.Printf("[INFO] %d of total %d jobs are going to be imported", i, len(l)) +} + +func makeShouldOmitFieldForCluster(regex *regexp.Regexp) func(ic *importContext, pathString string, as *schema.Schema, d *schema.ResourceData) bool { + return func(ic *importContext, pathString string, as *schema.Schema, d *schema.ResourceData) bool { + prefix := "" + if regex != nil { + if res := regex.FindStringSubmatch(pathString); res != nil { + prefix = res[0] + } else { + return false + } + } + raw := d.Get(pathString) + if raw != nil { + v := reflect.ValueOf(raw) + if as.Optional && v.IsZero() { + return true + } + } + workerInstPoolID := d.Get(prefix + "instance_pool_id").(string) + switch pathString { + case prefix + "node_type_id": + return workerInstPoolID != "" + case prefix + "driver_node_type_id": + driverInstPoolID := d.Get(prefix + "driver_instance_pool_id").(string) + nodeTypeID := d.Get(prefix + "node_type_id").(string) + return workerInstPoolID != "" || driverInstPoolID != "" || raw.(string) == nodeTypeID + case prefix + "driver_instance_pool_id": + return raw.(string) == workerInstPoolID + case prefix + "enable_elastic_disk", prefix + "aws_attributes", prefix + "azure_attributes", prefix + "gcp_attributes": + return workerInstPoolID != "" + case prefix + "enable_local_disk_encryption": + return false + case prefix + "spark_conf": + return fmt.Sprintf("%v", d.Get(prefix+"spark_conf")) == "map[spark.databricks.delta.preview.enabled:true]" + case prefix + "spark_env_vars": + return fmt.Sprintf("%v", d.Get(prefix+"spark_env_vars")) == "map[PYSPARK_PYTHON:/databricks/python3/bin/python3]" + } + + return defaultShouldOmitFieldFunc(ic, pathString, as, d) + } +} + +func (ic *importContext) emitJobsDestinationNotifications(notifications []sdk_jobs.Webhook) { + for _, notification := range notifications { + ic.Emit(&resource{ + Resource: "databricks_notification_destination", + ID: notification.Id, + }) + } +} diff --git a/exporter/util_scim.go b/exporter/util_scim.go new file mode 100644 index 0000000000..8887023400 --- /dev/null +++ b/exporter/util_scim.go @@ -0,0 +1,392 @@ +package exporter + +import ( + "fmt" + "log" + "strings" + + "github.com/databricks/terraform-provider-databricks/common" + "github.com/databricks/terraform-provider-databricks/scim" + + "github.com/databricks/databricks-sdk-go/service/iam" + + "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" +) + +const ( + nonExistingUserOrSp = "__USER_OR_SPN_DOES_NOT_EXIST__" +) + +func (ic *importContext) emitListOfUsers(users []string) { + for _, user := range users { + if user != "" { + ic.Emit(&resource{ + Resource: "databricks_user", + Attribute: "user_name", + Value: user, + }) + } + } +} + +func (ic *importContext) emitUserOrServicePrincipal(userOrSPName string) { + if userOrSPName == "" || !ic.isServiceEnabled("users") { + return + } + // Cache check here to avoid emitting + ic.emittedUsersMutex.RLock() + _, exists := ic.emittedUsers[userOrSPName] + ic.emittedUsersMutex.RUnlock() + if exists { + // log.Printf("[DEBUG] user or SP %s already emitted...", userOrSPName) + return + } + if common.StringIsUUID(userOrSPName) { + user, err := ic.findSpnByAppID(userOrSPName, false) + if err != nil { + log.Printf("[ERROR] Can't find SP with application ID %s", userOrSPName) + ic.addIgnoredResource(fmt.Sprintf("databricks_service_principal. application_id=%s", userOrSPName)) + } else { + ic.Emit(&resource{ + Resource: "databricks_service_principal", + ID: user.ID, + }) + } + } else { + user, err := ic.findUserByName(strings.ToLower(userOrSPName), false) + if err != nil { + log.Printf("[ERROR] Can't find user with name %s", userOrSPName) + ic.addIgnoredResource(fmt.Sprintf("databricks_user. user_name=%s", userOrSPName)) + } else { + ic.Emit(&resource{ + Resource: "databricks_user", + ID: user.ID, + }) + } + } + ic.emittedUsersMutex.Lock() + ic.emittedUsers[userOrSPName] = struct{}{} + ic.emittedUsersMutex.Unlock() +} + +func getUserOrSpNameAndDirectory(path, prefix string) (string, string) { + if !strings.HasPrefix(path, prefix) { + return "", "" + } + pathLen := len(path) + prefixLen := len(prefix) + searchStart := prefixLen + 1 + if pathLen <= searchStart { + return "", "" + } + pos := strings.Index(path[searchStart:pathLen], "/") + if pos == -1 { // we have only user directory... + return path[searchStart:pathLen], path + } + return path[searchStart : pos+searchStart], path[0 : pos+searchStart] +} + +func (ic *importContext) emitUserOrServicePrincipalForPath(path, prefix string) { + userOrSpName, _ := getUserOrSpNameAndDirectory(path, prefix) + if userOrSpName != "" { + ic.emitUserOrServicePrincipal(userOrSpName) + } +} + +func (ic *importContext) IsUserOrServicePrincipalDirectory(path, prefix string, strict bool) bool { + userOrSPName, userDir := getUserOrSpNameAndDirectory(path, prefix) + if userOrSPName == "" { + return false + } + // strict mode means that it should be exactly user dir, maybe with trailing `/` + if strict && !(len(path) == len(userDir) || (len(path) == len(userDir)+1 && path[len(path)-1] == '/')) { + return false + } + ic.userOrSpDirectoriesMutex.RLock() + result, exists := ic.userOrSpDirectories[userDir] + ic.userOrSpDirectoriesMutex.RUnlock() + if exists { + // log.Printf("[DEBUG] Directory %s already checked. Result=%v", userDir, result) + return result + } + var err error + if common.StringIsUUID(userOrSPName) { + _, err = ic.findSpnByAppID(userOrSPName, true) + if err != nil { + ic.addIgnoredResource(fmt.Sprintf("databricks_service_principal. application_id=%s", userOrSPName)) + } + } else { + _, err = ic.findUserByName(strings.ToLower(userOrSPName), true) + if err != nil { + ic.addIgnoredResource(fmt.Sprintf("databricks_user. user_name=%s", userOrSPName)) + } + } + ic.userOrSpDirectoriesMutex.Lock() + ic.userOrSpDirectories[userDir] = (err == nil) + ic.userOrSpDirectoriesMutex.Unlock() + return err == nil +} + +func (ic *importContext) emitGroups(u scim.User) { + for _, g := range u.Groups { + if g.Type != "direct" { + log.Printf("[DEBUG] Skipping non-direct group %s/%s for %s", g.Value, g.Display, u.DisplayName) + continue + } + ic.Emit(&resource{ + Resource: "databricks_group", + ID: g.Value, + }) + id := fmt.Sprintf("%s|%s", g.Value, u.ID) + ic.Emit(&resource{ + Resource: "databricks_group_member", + ID: id, + Name: fmt.Sprintf("%s_%s_%s_%s", g.Display, g.Value, u.DisplayName, u.ID), + Data: ic.makeGroupMemberData(id, g.Value, u.ID), + }) + } +} + +func (ic *importContext) emitRoles(objType string, id string, roles []scim.ComplexValue) { + log.Printf("[DEBUG] emitting roles for object type: %s, ID: %s, roles: %v", objType, id, roles) + for _, role := range roles { + if role.Type != "direct" { + continue + } + if !ic.accountLevel { + ic.Emit(&resource{ + Resource: "databricks_instance_profile", + ID: role.Value, + }) + } + ic.Emit(&resource{ + Resource: fmt.Sprintf("databricks_%s_role", objType), + ID: fmt.Sprintf("%s|%s", id, role.Value), + }) + } +} + +func (ic *importContext) cacheGroups() error { + ic.groupsMutex.Lock() + defer ic.groupsMutex.Unlock() + if ic.allGroups == nil { + log.Printf("[INFO] Caching groups in memory ...") + var groups *[]iam.Group + var err error + err = runWithRetries(func() error { + var grps []iam.Group + var err error + if ic.accountLevel { + grps, err = ic.accountClient.Groups.ListAll(ic.Context, iam.ListAccountGroupsRequest{ + Attributes: "id", + }) + } else { + grps, err = ic.workspaceClient.Groups.ListAll(ic.Context, iam.ListGroupsRequest{ + Attributes: "id", + }) + } + if err != nil { + return err + } + groups = &grps + return nil + }, "error fetching full list of groups") + if err != nil { + log.Printf("[ERROR] can't fetch list of groups. Error: %v", err) + return err + } + api := scim.NewGroupsAPI(ic.Context, ic.Client) + groupsCount := len(*groups) + ic.allGroups = make([]scim.Group, 0, groupsCount) + for i, g := range *groups { + err = runWithRetries(func() error { + group, err := api.Read(g.Id, "id,displayName,active,externalId,entitlements,groups,roles,members") + if err != nil { + return err + } + ic.allGroups = append(ic.allGroups, group) + return nil + }, "error reading group with ID "+g.Id) + if err != nil { + log.Printf("[ERROR] Error reading group with ID %s: %v", g.Id, err) + continue + } + if (i+1)%10 == 0 { + log.Printf("[DEBUG] Read %d out of %d groups", i+1, groupsCount) + } + } + log.Printf("[INFO] Cached %d groups", len(ic.allGroups)) + } + return nil +} + +func (ic *importContext) getUsersMapping() { + ic.allUsersMutex.RLocker().Lock() + userMapping := ic.allUsersMapping + ic.allUsersMutex.RLocker().Unlock() + if userMapping == nil { + ic.allUsersMutex.Lock() + defer ic.allUsersMutex.Unlock() + if ic.allUsersMapping != nil { + return + } + ic.allUsersMapping = make(map[string]string) + err := runWithRetries(func() error { + var users []iam.User + var err error + if ic.accountLevel { + users, err = ic.accountClient.Users.ListAll(ic.Context, iam.ListAccountUsersRequest{ + Attributes: "id,userName", + }) + } else { + users, err = ic.workspaceClient.Users.ListAll(ic.Context, iam.ListUsersRequest{ + Attributes: "id,userName", + }) + } + if err != nil { + return err + } + for _, user := range users { + ic.allUsersMapping[user.UserName] = user.Id + } + log.Printf("[DEBUG] users are copied") + return nil + }, "error fetching full list of users") + if err != nil { + log.Fatalf("[ERROR] can't fetch list of users after few retries: error=%v", err) + } + } +} + +func (ic *importContext) findUserByName(name string, fastCheck bool) (u *scim.User, err error) { + log.Printf("[DEBUG] Looking for user %s", name) + ic.usersMutex.RLocker().Lock() + user, exists := ic.allUsers[name] + ic.usersMutex.RLocker().Unlock() + if exists { + if user.UserName == nonExistingUserOrSp { + log.Printf("[DEBUG] non-existing user %s is found in the cache", name) + err = fmt.Errorf("user %s is not found", name) + } else { + log.Printf("[DEBUG] existing user %s is found in the cache", name) + u = &user + } + return + } + ic.getUsersMapping() + ic.allUsersMutex.RLocker().Lock() + userId, exists := ic.allUsersMapping[name] + ic.allUsersMutex.RLocker().Unlock() + if !exists { + err = fmt.Errorf("there is no user '%s'", name) + u = &scim.User{UserName: nonExistingUserOrSp} + } else { + if fastCheck { + return &scim.User{UserName: name}, nil + } + a := scim.NewUsersAPI(ic.Context, ic.Client) + err = runWithRetries(func() error { + usr, err := a.Read(userId, "id,userName,displayName,active,externalId,entitlements,groups,roles") + if err != nil { + return err + } + u = &usr + return nil + }, fmt.Sprintf("error reading user with name '%s', user ID: %s", name, userId)) + if err != nil { + log.Printf("[WARN] error reading user with name '%s', user ID: %s", name, userId) + u = &scim.User{UserName: nonExistingUserOrSp} + } + } + ic.usersMutex.Lock() + defer ic.usersMutex.Unlock() + ic.allUsers[name] = *u + return +} + +func (ic *importContext) getSpsMapping() { + ic.spsMutex.Lock() + defer ic.spsMutex.Unlock() + if ic.allSpsMapping == nil { + ic.allSpsMapping = make(map[string]string) + err := runWithRetries(func() error { + var sps []iam.ServicePrincipal + var err error + if ic.accountLevel { + sps, err = ic.accountClient.ServicePrincipals.ListAll(ic.Context, iam.ListAccountServicePrincipalsRequest{ + Attributes: "id,userName", + }) + } else { + sps, err = ic.workspaceClient.ServicePrincipals.ListAll(ic.Context, iam.ListServicePrincipalsRequest{ + Attributes: "id,userName", + }) + } + if err != nil { + return err + } + for _, sp := range sps { + ic.allSpsMapping[sp.ApplicationId] = sp.Id + } + return nil + }, "error fetching full list of service principals") + if err != nil { + log.Fatalf("[ERROR] can't fetch list of service principals after few retries: error=%v", err) + } + } +} + +func (ic *importContext) findSpnByAppID(applicationID string, fastCheck bool) (u *scim.User, err error) { + log.Printf("[DEBUG] Looking for SP %s", applicationID) + ic.spsMutex.RLocker().Lock() + sp, exists := ic.allSps[applicationID] + ic.spsMutex.RLocker().Unlock() + if exists { + if sp.ApplicationID == nonExistingUserOrSp { + log.Printf("[DEBUG] non-existing SP %s is found in the cache", applicationID) + err = fmt.Errorf("service principal %s is not found", applicationID) + } else { + log.Printf("[DEBUG] existing SP %s is found in the cache", applicationID) + u = &sp + } + return + } + ic.getSpsMapping() + ic.spsMutex.RLocker().Lock() + spId, exists := ic.allSpsMapping[applicationID] + ic.spsMutex.RLocker().Unlock() + if !exists { + err = fmt.Errorf("there is no service principal '%s'", applicationID) + u = &scim.User{ApplicationID: nonExistingUserOrSp} + } else { + if fastCheck { + return &scim.User{ApplicationID: applicationID}, nil + } + a := scim.NewServicePrincipalsAPI(ic.Context, ic.Client) + err = runWithRetries(func() error { + usr, err := a.Read(spId, "userName,displayName,active,externalId,entitlements,groups,roles") + if err != nil { + return err + } + u = &usr + return nil + }, fmt.Sprintf("error reading service principal with AppID '%s', SP ID: %s", applicationID, spId)) + if err != nil { + log.Printf("[WARN] error reading service principal with AppID '%s', SP ID: %s", applicationID, spId) + u = &scim.User{ApplicationID: nonExistingUserOrSp} + } + } + ic.spsMutex.Lock() + defer ic.spsMutex.Unlock() + ic.allSps[applicationID] = *u + + return +} + +func (ic *importContext) makeGroupMemberData(id, groupId, memberId string) *schema.ResourceData { + data := scim.ResourceGroupMember().ToResource().TestResourceData() + data.MarkNewResource() + data.SetId(id) + data.Set("group_id", groupId) + data.Set("member_id", memberId) + return data +} diff --git a/exporter/util_workspace.go b/exporter/util_workspace.go new file mode 100644 index 0000000000..0361100592 --- /dev/null +++ b/exporter/util_workspace.go @@ -0,0 +1,400 @@ +package exporter + +import ( + "fmt" + "log" + "strconv" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/databricks/terraform-provider-databricks/common" + "github.com/databricks/terraform-provider-databricks/workspace" + + "golang.org/x/exp/slices" + + "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" +) + +func isSupportedWorkspaceObject(obj workspace.ObjectStatus) bool { + switch obj.ObjectType { + case workspace.Directory, workspace.Notebook, workspace.File: + return true + } + return false +} + +func (ic *importContext) emitRepoByPath(path string) { + // Path to Repos objects consits of following parts: /Repos, folder, repository, path inside Repo. + // Because it starts with `/`, it will produce empty string as first element in the slice. + // And we're stopping splitting to avoid producing too many not necessary parts, so we have 5 parts only. + parts := strings.SplitN(path, "/", 5) + if len(parts) >= 4 { + ic.Emit(&resource{ + Resource: "databricks_repo", + Attribute: "path", + Value: strings.Join(parts[:4], "/"), + }) + } else { + log.Printf("[WARN] Incorrect Repos path") + } +} + +func (ic *importContext) emitWorkspaceFileOrRepo(path string) { + if strings.HasPrefix(path, "/Repos") { + ic.emitRepoByPath(path) + } else { + // TODO: wrap this into ic.shouldEmit... + // TODO: strip /Workspace prefix if it's provided + ic.Emit(&resource{ + Resource: "databricks_workspace_file", + ID: path, + }) + } +} + +func (ic *importContext) emitNotebookOrRepo(path string) { + if strings.HasPrefix(path, "/Repos") { + ic.emitRepoByPath(path) + } else { + // TODO: strip /Workspace prefix if it's provided + ic.maybeEmitWorkspaceObject("databricks_notebook", path, nil) + } +} + +func (ic *importContext) getAllDirectories() []workspace.ObjectStatus { + if len(ic.allDirectories) == 0 { + objects := ic.getAllWorkspaceObjects(nil) + ic.wsObjectsMutex.Lock() + defer ic.wsObjectsMutex.Unlock() + if len(ic.allDirectories) == 0 { + for _, v := range objects { + if v.ObjectType == workspace.Directory { + ic.allDirectories = append(ic.allDirectories, v) + } + } + } + } + return ic.allDirectories +} + +// TODO: Ignore databricks_automl as well? +var directoriesToIgnore = []string{".ide", ".bundle", "__pycache__"} + +// TODO: add ignoring directories of deleted users? This could potentially decrease the number of processed objects... +func excludeAuxiliaryDirectories(v workspace.ObjectStatus) bool { + if v.ObjectType != workspace.Directory { + return true + } + // TODO: rewrite to use suffix check, etc., instead of split and slice contains? + parts := strings.Split(v.Path, "/") + result := len(parts) > 1 && slices.Contains[[]string, string](directoriesToIgnore, parts[len(parts)-1]) + if result { + log.Printf("[DEBUG] Ignoring directory %s", v.Path) + } + return !result +} + +func (ic *importContext) getAllWorkspaceObjects(visitor func([]workspace.ObjectStatus)) []workspace.ObjectStatus { + ic.wsObjectsMutex.Lock() + defer ic.wsObjectsMutex.Unlock() + if len(ic.allWorkspaceObjects) == 0 { + t1 := time.Now() + log.Print("[INFO] Starting to list all workspace objects") + notebooksAPI := workspace.NewNotebooksAPI(ic.Context, ic.Client) + ic.allWorkspaceObjects, _ = ListParallel(notebooksAPI, "/", excludeAuxiliaryDirectories, visitor) + log.Printf("[INFO] Finished listing of all workspace objects. %d objects in total. %v seconds", + len(ic.allWorkspaceObjects), time.Since(t1).Seconds()) + } + return ic.allWorkspaceObjects +} + +func shouldOmitMd5Field(ic *importContext, pathString string, as *schema.Schema, d *schema.ResourceData) bool { + if pathString == "md5" { // `md5` is kind of computed, but not declared as it... + return true + } + return defaultShouldOmitFieldFunc(ic, pathString, as, d) +} + +func workspaceObjectResouceName(ic *importContext, d *schema.ResourceData) string { + name := d.Get("path").(string) + if name == "" { + return d.Id() + } else { + name = nameNormalizationRegex.ReplaceAllString(name[1:], "_") + "_" + + strconv.FormatInt(int64(d.Get("object_id").(int)), 10) + } + return name +} + +func wsObjectGetModifiedAt(obs workspace.ObjectStatus) int64 { + if obs.ModifiedAtInteractive != nil && obs.ModifiedAtInteractive.TimeMillis != 0 { + return obs.ModifiedAtInteractive.TimeMillis + } + return obs.ModifiedAt +} + +func (ic *importContext) shouldEmitForPath(path string) bool { + if !ic.exportDeletedUsersAssets && strings.HasPrefix(path, "/Users/") { + return ic.IsUserOrServicePrincipalDirectory(path, "/Users", false) + } + return true +} + +func (ic *importContext) maybeEmitWorkspaceObject(resourceType, path string, obj *workspace.ObjectStatus) { + if ic.shouldEmitForPath(path) { + var data *schema.ResourceData + if obj != nil { + switch resourceType { + case "databricks_notebook": + data = workspace.ResourceNotebook().ToResource().TestResourceData() + case "databricks_workspace_file": + data = workspace.ResourceWorkspaceFile().ToResource().TestResourceData() + case "databricks_directory": + data = workspace.ResourceDirectory().ToResource().TestResourceData() + } + if data != nil { + scm := ic.Resources[resourceType].Schema + data.MarkNewResource() + data.SetId(path) + err := common.StructToData(obj, scm, data) + if err != nil { + log.Printf("[ERROR] can't convert %s object to data: %v. obj=%v", resourceType, err, obj) + data = nil + } + } + } + ic.Emit(&resource{ + Resource: resourceType, + ID: path, + Data: data, + Incremental: ic.incremental, + }) + } else { + log.Printf("[WARN] Not emitting a workspace object %s for deleted user. Path='%s'", resourceType, path) + ic.addIgnoredResource(fmt.Sprintf("%s. path=%s", resourceType, path)) + } +} + +func (ic *importContext) shouldSkipWorkspaceObject(object workspace.ObjectStatus, updatedSinceMs int64) bool { + if ic.incremental && object.ObjectType == workspace.Directory { + return true + } + if !(object.ObjectType == workspace.Notebook || object.ObjectType == workspace.File) || + strings.HasPrefix(object.Path, "/Repos") { + // log.Printf("[DEBUG] Skipping unsupported entry %v", object) + return true + } + if res := ignoreIdeFolderRegex.FindStringSubmatch(object.Path); res != nil { + return true + } + modifiedAt := wsObjectGetModifiedAt(object) + if ic.incremental && modifiedAt < updatedSinceMs { + p := ic.oldWorkspaceObjectMapping[object.ObjectID] + if p == "" || p == object.Path { + log.Printf("[DEBUG] skipping '%s' that was modified at %d (last active=%d)", + object.Path, modifiedAt, updatedSinceMs) + return true + } + log.Printf("[DEBUG] Different path for object %d. Old='%s', New='%s'", object.ObjectID, p, object.Path) + } + if !ic.MatchesName(object.Path) { + return true + } + return false +} + +func emitWorkpaceObject(ic *importContext, object workspace.ObjectStatus) { + // check the size of the default channel, and add delays if it has less than %20 capacity left. + // In this case we won't need to have increase the size of the default channel to extended capacity. + defChannelSize := len(ic.defaultChannel) + if float64(defChannelSize) > float64(ic.defaultHanlerChannelSize)*0.8 { + log.Printf("[DEBUG] waiting a bit before emitting a resource because default channel is 80%% full (%d): %v", + defChannelSize, object) + time.Sleep(1 * time.Second) + } + switch object.ObjectType { + case workspace.Notebook: + ic.maybeEmitWorkspaceObject("databricks_notebook", object.Path, &object) + case workspace.File: + ic.maybeEmitWorkspaceObject("databricks_workspace_file", object.Path, &object) + case workspace.Directory: + ic.maybeEmitWorkspaceObject("databricks_directory", object.Path, &object) + default: + log.Printf("[WARN] unknown type %s for path %s", object.ObjectType, object.Path) + } +} + +func listNotebooksAndWorkspaceFiles(ic *importContext) error { + objectsChannel := make(chan workspace.ObjectStatus, defaultChannelSize) + numRoutines := 2 // TODO: make configurable? together with the channel size? + var processedObjects atomic.Uint64 + for i := 0; i < numRoutines; i++ { + num := i + ic.waitGroup.Add(1) + go func() { + log.Printf("[DEBUG] Starting channel %d for workspace objects", num) + for object := range objectsChannel { + processedObjects.Add(1) + ic.waitGroup.Add(1) + emitWorkpaceObject(ic, object) + ic.waitGroup.Done() + } + log.Printf("[DEBUG] channel %d for workspace objects is finished", num) + ic.waitGroup.Done() + }() + } + // There are two use cases - this function will handle listing, or it will receive listing + updatedSinceMs := ic.getUpdatedSinceMs() + allObjects := ic.getAllWorkspaceObjects(func(objects []workspace.ObjectStatus) { + for _, object := range objects { + if object.ObjectType == workspace.Directory { + if !ic.incremental && object.Path != "/" && ic.isServiceInListing("directories") { + objectsChannel <- object + } + } else { + if ic.shouldSkipWorkspaceObject(object, updatedSinceMs) { + continue + } + object := object + switch object.ObjectType { + case workspace.Notebook, workspace.File: + objectsChannel <- object + default: + log.Printf("[WARN] unknown type %s for path %s", object.ObjectType, object.Path) + } + } + } + }) + close(objectsChannel) + log.Printf("[DEBUG] processedObjects=%d", processedObjects.Load()) + if processedObjects.Load() == 0 { // we didn't have side effect from listing as it was already happened + log.Printf("[DEBUG] ic.getAllWorkspaceObjects already was called before, so we need to explicitly submit all objects") + for _, object := range allObjects { + if ic.shouldSkipWorkspaceObject(object, updatedSinceMs) { + continue + } + if object.ObjectType == workspace.Directory && !ic.incremental && ic.isServiceInListing("directories") && object.Path != "/" { + emitWorkpaceObject(ic, object) + } else if (object.ObjectType == workspace.Notebook || object.ObjectType == workspace.File) && ic.isServiceInListing("notebooks") { + emitWorkpaceObject(ic, object) + } + } + } + return nil +} + +// Parallel listing implementation +type syncAnswer struct { + MU sync.Mutex + data []workspace.ObjectStatus +} + +func (a *syncAnswer) append(objs []workspace.ObjectStatus) { + a.MU.Lock() + a.data = append(a.data, objs...) + a.MU.Unlock() +} + +type directoryInfo struct { + Path string + Attempts int +} + +// constants related to the parallel listing +const ( + envVarListParallelism = "EXPORTER_WS_LIST_PARALLELISM" + envVarDirectoryChannelSize = "EXPORTER_DIRECTORIES_CHANNEL_SIZE" + defaultWorkersPoolSize = 10 + defaultDirectoryChannelSize = 100000 +) + +func recursiveAddPathsParallel(a workspace.NotebooksAPI, directory directoryInfo, dirChannel chan directoryInfo, + answer *syncAnswer, wg *sync.WaitGroup, shouldIncludeDir func(workspace.ObjectStatus) bool, visitor func([]workspace.ObjectStatus)) { + defer wg.Done() + notebookInfoList, err := a.ListInternalImpl(directory.Path) + if err != nil { + log.Printf("[WARN] error listing '%s': %v", directory.Path, err) + if isRetryableError(err.Error(), directory.Attempts) { + wg.Add(1) + log.Printf("[INFO] attempt %d of retrying listing of '%s' after error: %v", + directory.Attempts+1, directory.Path, err) + time.Sleep(time.Duration(retryDelaySeconds) * time.Second) + dirChannel <- directoryInfo{Path: directory.Path, Attempts: directory.Attempts + 1} + } + } + + newList := make([]workspace.ObjectStatus, 0, len(notebookInfoList)) + directories := make([]workspace.ObjectStatus, 0, len(notebookInfoList)) + for _, v := range notebookInfoList { + if v.ObjectType == workspace.Directory { + if shouldIncludeDir(v) { + newList = append(newList, v) + directories = append(directories, v) + } + } else { + newList = append(newList, v) + } + } + answer.append(newList) + for _, v := range directories { + wg.Add(1) + log.Printf("[DEBUG] putting directory '%s' into channel. Channel size: %d", v.Path, len(dirChannel)) + dirChannel <- directoryInfo{Path: v.Path} + time.Sleep(3 * time.Millisecond) + } + if visitor != nil { + visitor(newList) + } +} + +func ListParallel(a workspace.NotebooksAPI, path string, shouldIncludeDir func(workspace.ObjectStatus) bool, + visitor func([]workspace.ObjectStatus)) ([]workspace.ObjectStatus, error) { + var answer syncAnswer + wg := &sync.WaitGroup{} + + if shouldIncludeDir == nil { + shouldIncludeDir = func(workspace.ObjectStatus) bool { return true } + } + + numWorkers := getEnvAsInt(envVarListParallelism, defaultWorkersPoolSize) + channelSize := getEnvAsInt(envVarDirectoryChannelSize, defaultDirectoryChannelSize) + dirChannel := make(chan directoryInfo, channelSize) + for i := 0; i < numWorkers; i++ { + t := i + go func() { + log.Printf("[DEBUG] starting go routine %d", t) + for directory := range dirChannel { + log.Printf("[DEBUG] processing directory %s", directory.Path) + recursiveAddPathsParallel(a, directory, dirChannel, &answer, wg, shouldIncludeDir, visitor) + } + }() + + } + log.Print("[DEBUG] pushing initial path to channel") + wg.Add(1) + recursiveAddPathsParallel(a, directoryInfo{Path: path}, dirChannel, &answer, wg, shouldIncludeDir, visitor) + log.Print("[DEBUG] starting to wait") + wg.Wait() + log.Print("[DEBUG] closing the directory channel") + close(dirChannel) + + answer.MU.Lock() + defer answer.MU.Unlock() + return answer.data, nil +} + +func (ic *importContext) emitWorkspaceObjectParentDirectory(r *resource) { + if !ic.isServiceEnabled("directories") { + return + } + if idx := strings.LastIndex(r.ID, "/"); idx > 0 { // not found, or directly in the root... + directoryPath := r.ID[:idx] + ic.Emit(&resource{ + Resource: "databricks_directory", + ID: directoryPath, + }) + r.AddExtraData(ParentDirectoryExtraKey, directoryPath) + } +} From f1dc449c373fc333d736927cfad1ff3e35665f46 Mon Sep 17 00:00:00 2001 From: Alex Ott Date: Mon, 12 Aug 2024 11:40:21 +0200 Subject: [PATCH 07/10] [Exporter] Fix generation of `autotermination_minutes = 0` (#3881) ## Changes The code that evaluated if field should be skipped or not, was incorrectly omitting zero value for `autotermination_minutes` attribute. ## Tests - [x] `make test` run locally - [ ] relevant change in `docs/` folder - [ ] covered with integration tests in `internal/acceptance` - [ ] relevant acceptance tests are passing - [ ] using Go SDK --- exporter/exporter_test.go | 17 ++++++++++++++++- .../test-data/get-cluster-test2-response.json | 1 - exporter/util_compute.go | 7 ------- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/exporter/exporter_test.go b/exporter/exporter_test.go index cfdaea301e..a539209287 100644 --- a/exporter/exporter_test.go +++ b/exporter/exporter_test.go @@ -1029,6 +1029,22 @@ func TestImportingClusters(t *testing.T) { err := ic.Run() os.Unsetenv("EXPORTER_PARALLELISM_default") assert.NoError(t, err) + content, err := os.ReadFile(tmpDir + "/compute.tf") + assert.NoError(t, err) + contentStr := string(content) + assert.True(t, strings.Contains(contentStr, `resource "databricks_cluster" "test1_test1"`)) + assert.True(t, strings.Contains(contentStr, `resource "databricks_cluster" "test_cluster_policy_test2"`)) + assert.True(t, strings.Contains(contentStr, `policy_id = databricks_cluster_policy.users_cluster_policy.id`)) + assert.True(t, strings.Contains(contentStr, `autotermination_minutes = 0`)) + assert.True(t, strings.Contains(contentStr, `autotermination_minutes = 120`)) + assert.True(t, strings.Contains(contentStr, `library { + jar = databricks_dbfs_file._0eee4efe7411a5bdca65d7b79188026c_test_jar.dbfs_path + }`)) + assert.True(t, strings.Contains(contentStr, `init_scripts { + dbfs { + destination = databricks_dbfs_file._0eee4efe7411a5bdca65d7b79188026c_test_jar.dbfs_path + } + }`)) }) } @@ -2898,7 +2914,6 @@ func TestNotificationDestinationExport(t *testing.T) { content, err := os.ReadFile(tmpDir + "/settings.tf") assert.NoError(t, err) contentStr := string(content) - log.Printf("[DEBUG] contentStr: %s", contentStr) assert.True(t, strings.Contains(contentStr, `resource "databricks_notification_destination" "pagerdruty_456"`)) assert.True(t, strings.Contains(contentStr, `resource "databricks_notification_destination" "teams_345"`)) assert.True(t, strings.Contains(contentStr, `resource "databricks_notification_destination" "email_123" { diff --git a/exporter/test-data/get-cluster-test2-response.json b/exporter/test-data/get-cluster-test2-response.json index 82643aeadb..8b190632a6 100644 --- a/exporter/test-data/get-cluster-test2-response.json +++ b/exporter/test-data/get-cluster-test2-response.json @@ -3,7 +3,6 @@ "max_workers": 2, "min_workers": 1 }, - "autotermination_minutes": 10, "azure_attributes": { "availability": "ON_DEMAND_AZURE", "first_on_demand": 1, diff --git a/exporter/util_compute.go b/exporter/util_compute.go index 7967b9ebc9..4a42a0cf21 100644 --- a/exporter/util_compute.go +++ b/exporter/util_compute.go @@ -3,7 +3,6 @@ package exporter import ( "fmt" "log" - "reflect" "regexp" "github.com/databricks/terraform-provider-databricks/common" @@ -188,12 +187,6 @@ func makeShouldOmitFieldForCluster(regex *regexp.Regexp) func(ic *importContext, } } raw := d.Get(pathString) - if raw != nil { - v := reflect.ValueOf(raw) - if as.Optional && v.IsZero() { - return true - } - } workerInstPoolID := d.Get(prefix + "instance_pool_id").(string) switch pathString { case prefix + "node_type_id": From 9037ceec9bee13f0dcd3e204424af57b456cfc3e Mon Sep 17 00:00:00 2001 From: Alex Ott Date: Tue, 13 Aug 2024 09:57:43 +0200 Subject: [PATCH 08/10] [Doc] Clarify setting of permissions for workspace objects (#3884) ## Changes Clarify that permissions on workspace objects (directories/files/notebooks) could be set either by path or by the object ID, and the import will use only object ID. This should help with issues like #3882 ## Tests - [ ] `make test` run locally - [x] relevant change in `docs/` folder - [ ] covered with integration tests in `internal/acceptance` - [ ] relevant acceptance tests are passing - [ ] using Go SDK --- docs/resources/permissions.md | 77 +++++++++++++++++++++++++++++++++-- 1 file changed, 73 insertions(+), 4 deletions(-) diff --git a/docs/resources/permissions.md b/docs/resources/permissions.md index fcd7b69b39..2070731c7e 100644 --- a/docs/resources/permissions.md +++ b/docs/resources/permissions.md @@ -291,6 +291,8 @@ resource "databricks_permissions" "dlt_usage" { Valid [permission levels](https://docs.databricks.com/security/access-control/workspace-acl.html#notebook-permissions) for [databricks_notebook](notebook.md) are: `CAN_READ`, `CAN_RUN`, `CAN_EDIT`, and `CAN_MANAGE`. +A notebook could be specified by using either `notebook_path` or `notebook_id` attribute. The value for the `notebook_id` is the object ID of the resource in the Databricks Workspace that is exposed as `object_id` attribute of the `databricks_notebook` resource as shown below. + ```hcl resource "databricks_group" "auto" { display_name = "Automation" @@ -306,7 +308,7 @@ resource "databricks_notebook" "this" { language = "PYTHON" } -resource "databricks_permissions" "notebook_usage" { +resource "databricks_permissions" "notebook_usage_by_path" { notebook_path = databricks_notebook.this.path access_control { @@ -324,12 +326,35 @@ resource "databricks_permissions" "notebook_usage" { permission_level = "CAN_EDIT" } } + +resource "databricks_permissions" "notebook_usage_by_id" { + notebook_id = databricks_notebook.this.object_id + + access_control { + group_name = "users" + permission_level = "CAN_READ" + } + + access_control { + group_name = databricks_group.auto.display_name + permission_level = "CAN_RUN" + } + + access_control { + group_name = databricks_group.eng.display_name + permission_level = "CAN_EDIT" + } +} ``` +-> **Note**: when importing a permissions resource, only the `notebook_id` is filled! + ## Workspace file usage Valid permission levels for [databricks_workspace_file](workspace_file.md) are: `CAN_READ`, `CAN_RUN`, `CAN_EDIT`, and `CAN_MANAGE`. +A workspace file could be specified by using either `workspace_file_path` or `workspace_file_id` attribute. The value for the `workspace_file_id` is the object ID of the resource in the Databricks Workspace that is exposed as `object_id` attribute of the `databricks_workspace_file` resource as shown below. + ```hcl resource "databricks_group" "auto" { display_name = "Automation" @@ -344,7 +369,7 @@ resource "databricks_workspace_file" "this" { path = "/Production/ETL/Features.py" } -resource "databricks_permissions" "workspace_file_usage" { +resource "databricks_permissions" "workspace_file_usage_by_path" { workspace_file_path = databricks_workspace_file.this.path access_control { @@ -362,8 +387,29 @@ resource "databricks_permissions" "workspace_file_usage" { permission_level = "CAN_EDIT" } } + +resource "databricks_permissions" "workspace_file_usage_by_id" { + workspace_file_id = databricks_workspace_file.this.object_id + + access_control { + group_name = "users" + permission_level = "CAN_READ" + } + + access_control { + group_name = databricks_group.auto.display_name + permission_level = "CAN_RUN" + } + + access_control { + group_name = databricks_group.eng.display_name + permission_level = "CAN_EDIT" + } +} ``` +-> **Note**: when importing a permissions resource, only the `workspace_file_id` is filled! + ## Folder usage Valid [permission levels](https://docs.databricks.com/security/access-control/workspace-acl.html#folder-permissions) for folders of [databricks_directory](directory.md) are: `CAN_READ`, `CAN_RUN`, `CAN_EDIT`, and `CAN_MANAGE`. Notebooks and experiments in a folder inherit all permissions settings of that folder. For example, a user (or service principal) that has `CAN_RUN` permission on a folder has `CAN_RUN` permission on the notebooks in that folder. @@ -373,6 +419,9 @@ Valid [permission levels](https://docs.databricks.com/security/access-control/wo - All users (or service principals) have `CAN_MANAGE` permission for objects the user creates. - User home directory - The user (or service principal) has `CAN_MANAGE` permission. All other users (or service principals) can list their directories. +A folder could be specified by using either `directory_path` or `directory_id` attribute. The value for the `directory_id` is the object ID of the resource in the Databricks Workspace that is exposed as `object_id` attribute of the `databricks_directory` resource as shown below. + + ```hcl resource "databricks_group" "auto" { display_name = "Automation" @@ -386,9 +435,27 @@ resource "databricks_directory" "this" { path = "/Production/ETL" } -resource "databricks_permissions" "folder_usage" { +resource "databricks_permissions" "folder_usage_by_path" { directory_path = databricks_directory.this.path - depends_on = [databricks_directory.this] + + access_control { + group_name = "users" + permission_level = "CAN_READ" + } + + access_control { + group_name = databricks_group.auto.display_name + permission_level = "CAN_RUN" + } + + access_control { + group_name = databricks_group.eng.display_name + permission_level = "CAN_EDIT" + } +} + +resource "databricks_permissions" "folder_usage_by_id" { + directory_id = databricks_directory.this.object_id access_control { group_name = "users" @@ -407,6 +474,8 @@ resource "databricks_permissions" "folder_usage" { } ``` +-> **Note**: when importing a permissions resource, only the `directory_id` is filled! + ## Repos usage Valid [permission levels](https://docs.databricks.com/security/access-control/workspace-acl.html) for [databricks_repo](repo.md) are: `CAN_READ`, `CAN_RUN`, `CAN_EDIT`, and `CAN_MANAGE`. From 9490aa8c1ad7ae2d053f8d8a7270e5068948d9ba Mon Sep 17 00:00:00 2001 From: Alex Ott Date: Tue, 13 Aug 2024 09:57:54 +0200 Subject: [PATCH 09/10] [Fix] Fix crash when destroying `databricks_compliance_security_profile_workspace_setting` (#3883) ## Changes The `databricks_compliance_security_profile_workspace_setting` didn't have a `deleteFunc` defined and this lead to the crash when destroying the resource. This PR fixes this issue in two places: 1. Adding explicit `deleteFunc` that will just print warning into the log 1. Checking that `deleteFunc` is not null in `workspaceSetting` and `accountSetting` implementations to avoid similar problems in the future. Fixes #3675 ## Tests - [x] `make test` run locally - [x] relevant change in `docs/` folder - [ ] covered with integration tests in `internal/acceptance` - [ ] relevant acceptance tests are passing - [ ] using Go SDK --- .../compliance_security_profile_setting.md | 2 ++ settings/generic_setting.go | 9 +++++++++ ...urce_compliance_security_profile_setting.go | 5 +++++ ...compliance_security_profile_setting_test.go | 18 ++++++++++++++++++ 4 files changed, 34 insertions(+) diff --git a/docs/resources/compliance_security_profile_setting.md b/docs/resources/compliance_security_profile_setting.md index 524f5673a2..0794ace4f6 100644 --- a/docs/resources/compliance_security_profile_setting.md +++ b/docs/resources/compliance_security_profile_setting.md @@ -6,6 +6,8 @@ subcategory: "Settings" -> **Note** This resource could be only used with workspace-level provider! +-> **Note** This setting can NOT be disabled once it is enabled. + The `databricks_compliance_security_profile_workspace_setting` resource allows you to control whether to enable the compliance security profile for the current workspace. Enabling it on a workspace is permanent. By default, it is turned off. This setting can NOT be disabled once it is enabled. diff --git a/settings/generic_setting.go b/settings/generic_setting.go index c850758932..4f81123e03 100644 --- a/settings/generic_setting.go +++ b/settings/generic_setting.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/apierr" "github.com/databricks/terraform-provider-databricks/common" + "github.com/hashicorp/terraform-plugin-log/tflog" "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" ) @@ -133,6 +134,10 @@ func (w workspaceSetting[T]) Update(ctx context.Context, c *databricks.Workspace return w.updateFunc(ctx, c, t) } func (w workspaceSetting[T]) Delete(ctx context.Context, c *databricks.WorkspaceClient, etag string) (string, error) { + if w.deleteFunc == nil { + tflog.Warn(ctx, "The `delete` function isn't defined for this resource. Most probably it's not supported.") + return etag, nil + } return w.deleteFunc(ctx, c, etag) } func (w workspaceSetting[T]) GetETag(t *T) string { @@ -203,6 +208,10 @@ func (w accountSetting[T]) Update(ctx context.Context, c *databricks.AccountClie return w.updateFunc(ctx, c, t) } func (w accountSetting[T]) Delete(ctx context.Context, c *databricks.AccountClient, etag string) (string, error) { + if w.deleteFunc == nil { + tflog.Warn(ctx, "The `delete` function isn't defined for this resource. Most probably it's not supported.") + return etag, nil + } return w.deleteFunc(ctx, c, etag) } func (w accountSetting[T]) GetETag(t *T) string { diff --git a/settings/resource_compliance_security_profile_setting.go b/settings/resource_compliance_security_profile_setting.go index 5ab07ddb86..779bf78708 100644 --- a/settings/resource_compliance_security_profile_setting.go +++ b/settings/resource_compliance_security_profile_setting.go @@ -6,6 +6,7 @@ import ( "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/service/settings" + "github.com/hashicorp/terraform-plugin-log/tflog" ) // Enhanced Security Monitoring setting @@ -32,4 +33,8 @@ var complianceSecurityProfileSetting = workspaceSetting[settings.ComplianceSecur } return res.Etag, err }, + deleteFunc: func(ctx context.Context, w *databricks.WorkspaceClient, etag string) (string, error) { + tflog.Warn(ctx, "databricks_compliance_security_profile_workspace_setting couldn't be disabled!") + return etag, nil + }, } diff --git a/settings/resource_compliance_security_profile_setting_test.go b/settings/resource_compliance_security_profile_setting_test.go index ec7da734e9..2abd08ca05 100644 --- a/settings/resource_compliance_security_profile_setting_test.go +++ b/settings/resource_compliance_security_profile_setting_test.go @@ -256,3 +256,21 @@ func TestQueryUpdateComplianceSecurityProfileSettingWithConflict(t *testing.T) { assert.Equal(t, true, res["is_enabled"]) assert.Equal(t, "HIPAA", res["compliance_standards"].([]interface{})[0]) } + +func TestDeleteComplianceSecurityProfileSetting(t *testing.T) { + qa.ResourceFixture{ + Resource: testComplianceSecurityProfileSetting, + Delete: true, + HCL: ` + compliance_security_profile_workspace { + is_enabled = true + compliance_standards = ["HIPAA", "PCI_DSS"] + } + etag = "etag1" + `, + ID: defaultSettingId, + }.ApplyAndExpectData(t, map[string]any{ + etagAttrName: "etag1", + "id": defaultSettingId, + }) +} From 81be59161f3298b0e3803de8dbb4a40fd10164fa Mon Sep 17 00:00:00 2001 From: hectorcast-db Date: Tue, 13 Aug 2024 16:05:37 +0200 Subject: [PATCH 10/10] [Fix] Retry cluster update on "INVALID_STATE" (#3890) ## Changes Clusters can only be updated while in Running and Terminated state. This causes TF to fail to update Autoscaling Clusters if there is an ongoing resize. ## Tests - [X] `make test` run locally - [ ] relevant change in `docs/` folder - [ ] covered with integration tests in `internal/acceptance` - [ ] relevant acceptance tests are passing - [X] using Go SDK --- clusters/resource_cluster.go | 19 +++++- clusters/resource_cluster_test.go | 96 +++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 1 deletion(-) diff --git a/clusters/resource_cluster.go b/clusters/resource_cluster.go index 0948185d5c..1d31d931e5 100644 --- a/clusters/resource_cluster.go +++ b/clusters/resource_cluster.go @@ -2,15 +2,18 @@ package clusters import ( "context" + "errors" "fmt" "log" "strings" "time" "github.com/hashicorp/go-cty/cty" + "github.com/hashicorp/terraform-plugin-sdk/v2/helper/retry" "github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema" "github.com/hashicorp/terraform-plugin-sdk/v2/helper/validation" + "github.com/databricks/databricks-sdk-go/apierr" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/terraform-provider-databricks/common" "github.com/databricks/terraform-provider-databricks/libraries" @@ -604,7 +607,21 @@ func resourceClusterUpdate(ctx context.Context, d *schema.ResourceData, c *commo return err } cluster.ForceSendFields = []string{"NumWorkers"} - _, err = clusters.Edit(ctx, cluster) + + err = retry.RetryContext(ctx, 15*time.Minute, func() *retry.RetryError { + _, err = clusters.Edit(ctx, cluster) + if err == nil { + return nil + } + var apiErr *apierr.APIError + // Only Running and Terminated clusters can be modified. In particular, autoscaling clusters cannot be modified + // while the resizing is ongoing. We retry in this case. Scaling can take several minutes. + if errors.As(err, &apiErr) && apiErr.ErrorCode == "INVALID_STATE" { + return retry.RetryableError(fmt.Errorf("cluster %s cannot be modified in its current state", clusterId)) + } + return retry.NonRetryableError(err) + }) + } if err != nil { return err diff --git a/clusters/resource_cluster_test.go b/clusters/resource_cluster_test.go index 40f8a2db62..53c693810a 100644 --- a/clusters/resource_cluster_test.go +++ b/clusters/resource_cluster_test.go @@ -965,6 +965,102 @@ func TestResourceClusterUpdate(t *testing.T) { assert.Equal(t, "abc", d.Id(), "Id should be the same as in reading") } +func TestResourceClusterUpdate_WhileScaling(t *testing.T) { + d, err := qa.ResourceFixture{ + Fixtures: []qa.HTTPFixture{ + { + Method: "GET", + Resource: "/api/2.1/clusters/get?cluster_id=abc", + ReuseRequest: true, + Response: compute.ClusterDetails{ + ClusterId: "abc", + NumWorkers: 100, + ClusterName: "Shared Autoscaling", + SparkVersion: "7.1-scala12", + NodeTypeId: "i3.xlarge", + AutoterminationMinutes: 15, + State: compute.StateRunning, + }, + }, + { + Method: "POST", + Resource: "/api/2.1/clusters/events", + ExpectedRequest: compute.GetEvents{ + ClusterId: "abc", + Limit: 1, + Order: compute.GetEventsOrderDesc, + EventTypes: []compute.EventType{compute.EventTypePinned, compute.EventTypeUnpinned}, + }, + Response: compute.GetEventsResponse{ + Events: []compute.ClusterEvent{}, + TotalCount: 0, + }, + }, + { + Method: "POST", + Resource: "/api/2.1/clusters/start", + ExpectedRequest: compute.StartCluster{ + ClusterId: "abc", + }, + }, + { + Method: "GET", + Resource: "/api/2.0/libraries/cluster-status?cluster_id=abc", + Response: compute.ClusterLibraryStatuses{ + LibraryStatuses: []compute.LibraryFullStatus{}, + }, + }, + { + Method: "POST", + Resource: "/api/2.1/clusters/edit", + ExpectedRequest: compute.ClusterDetails{ + AutoterminationMinutes: 15, + ClusterId: "abc", + NumWorkers: 100, + ClusterName: "Shared Autoscaling", + SparkVersion: "7.1-scala12", + NodeTypeId: "i3.xlarge", + }, + Response: common.APIErrorBody{ + ErrorCode: "INVALID_STATE", + }, + Status: 404, + }, + { + Method: "POST", + Resource: "/api/2.1/clusters/edit", + ExpectedRequest: compute.ClusterDetails{ + AutoterminationMinutes: 15, + ClusterId: "abc", + NumWorkers: 100, + ClusterName: "Shared Autoscaling", + SparkVersion: "7.1-scala12", + NodeTypeId: "i3.xlarge", + }, + }, + { + Method: "GET", + Resource: "/api/2.0/libraries/cluster-status?cluster_id=abc", + Response: compute.ClusterLibraryStatuses{ + LibraryStatuses: []compute.LibraryFullStatus{}, + }, + }, + }, + ID: "abc", + Update: true, + Resource: ResourceCluster(), + State: map[string]any{ + "autotermination_minutes": 15, + "cluster_name": "Shared Autoscaling", + "spark_version": "7.1-scala12", + "node_type_id": "i3.xlarge", + "num_workers": 100, + }, + }.Apply(t) + assert.NoError(t, err) + assert.Equal(t, "abc", d.Id(), "Id should be the same as in reading") +} + func TestResourceClusterUpdateWithPinned(t *testing.T) { d, err := qa.ResourceFixture{ Fixtures: []qa.HTTPFixture{