From 1e4446689958ba7c6c14412e3d5b819658b52b2f Mon Sep 17 00:00:00 2001 From: Alex Ott Date: Mon, 15 Apr 2024 18:54:14 +0200 Subject: [PATCH] Exporter: decrease a need for `get-status` calls for directories when listing Also, rework handling of longest prefix matches a bit --- exporter/context.go | 19 ++++++++++++++++--- exporter/importables.go | 27 ++++++++------------------- exporter/model.go | 2 ++ exporter/util.go | 38 ++++++++++++++++++++++++++++---------- exporter/util_test.go | 17 +++++++++++++++++ 5 files changed, 71 insertions(+), 32 deletions(-) diff --git a/exporter/context.go b/exporter/context.go index 49fcb1ea62..08591457ad 100644 --- a/exporter/context.go +++ b/exporter/context.go @@ -1250,7 +1250,6 @@ func (ic *importContext) Find(value, attr string, ref reference, origResource *r !ic.isIgnoredResourceApproximation(sr) { log.Printf("[DEBUG] Finished direct lookup for reference for resource %s, attr='%s', value='%s', ref=%v. Found: type=%s name=%s", ref.Resource, attr, value, ref, sr.Type, sr.Name) - // TODO: we need to not generate traversals resources for which their Ignore function returns true... return matchValue, genTraversalTokens(sr, attr), sr.Mode == "data" } if ref.MatchType != MatchCaseInsensitive { // for case-insensitive matching we'll try iteration @@ -1258,6 +1257,21 @@ func (ic *importContext) Find(value, attr string, ref reference, origResource *r ref.Resource, attr, value, ref) return "", nil, false } + } else if ref.MatchType == MatchLongestPrefix && ref.ExtraLookupKey != "" { + // log.Printf("[DEBUG] Searching longest prefix for reference by key %s for resource %s, attr='%s', value='%s', ref=%v", + // ref.ExtraLookupKey, ref.Resource, attr, value, ref) + extraKeyValue, exists := origResource.GetExtraData(ref.ExtraLookupKey) + if exists && extraKeyValue.(string) != "" { + sr := ic.State.Get(ref.Resource, attr, extraKeyValue.(string)) + // log.Printf("[DEBUG] Found %s for resource %s, attr='%s', value='%s', ref=%v. Found: %v", + // ref.ExtraLookupKey, ref.Resource, attr, parentDir, ref, sr) + if sr != nil && (ref.IsValidApproximation == nil || ref.IsValidApproximation(ic, origResource, sr, origPath)) && + !ic.isIgnoredResourceApproximation(sr) { + log.Printf("[DEBUG] Finished direct lookup by key %s for reference for resource %s, attr='%s', value='%s', ref=%v. Found: type=%s name=%s", + ref.ExtraLookupKey, ref.Resource, attr, value, ref, sr.Type, sr.Name) + return extraKeyValue.(string), genTraversalTokens(sr, attr), sr.Mode == "data" + } + } } maxPrefixLen := 0 @@ -1275,7 +1289,7 @@ func (ic *importContext) Find(value, attr string, ref reference, origResource *r origValue := strValue if ref.SearchValueTransformFunc != nil { strValue = ref.SearchValueTransformFunc(strValue) - log.Printf("[DEBUG] Resource %s. Transformed value from '%s' to '%s'", ref.Resource, origValue, strValue) + log.Printf("[TRACE] Resource %s. Transformed value from '%s' to '%s'", ref.Resource, origValue, strValue) } matched := false switch ref.MatchType { @@ -1298,7 +1312,6 @@ func (ic *importContext) Find(value, attr string, ref reference, origResource *r ic.isIgnoredResourceApproximation(sr) { continue } - // TODO: we need to not generate traversals resources for which their Ignore function returns true... log.Printf("[DEBUG] Finished searching for reference for resource %s, attr='%s', value='%s', ref=%v. Found: type=%s name=%s", ref.Resource, attr, value, ref, sr.Type, sr.Name) return origValue, genTraversalTokens(sr, attr), sr.Mode == "data" diff --git a/exporter/importables.go b/exporter/importables.go index 1cc2005a15..dccf480733 100644 --- a/exporter/importables.go +++ b/exporter/importables.go @@ -79,6 +79,7 @@ var ( "storage_credential": {`CREATE_EXTERNAL_LOCATION`, `CREATE_EXTERNAL_TABLE`}, "foreign_connection": {`CREATE_FOREIGN_CATALOG`}, } + ParentDirectoryExtraKey = "parent_directory" ) func generateMountBody(ic *importContext, body *hclwrite.Body, r *resource) error { @@ -1526,20 +1527,14 @@ var resourcesMap map[string]importable = map[string]importable{ "notebook_"+ic.Importables["databricks_notebook"].Name(ic, r.Data)) // TODO: it's not completely correct condition - we need to make emit smarter - // emit only if permissions are different from their parent's permission. - if idx := strings.LastIndex(r.ID, "/"); idx != -1 { - directoryPath := r.ID[:idx] - ic.Emit(&resource{ - Resource: "databricks_directory", - ID: directoryPath, - }) - } + ic.emitWorkspaceObjectParentDirectory(r) return r.Data.Set("source", fileName) }, ShouldOmitField: shouldOmitMd5Field, Depends: []reference{ {Path: "source", File: true}, - {Path: "path", Resource: "databricks_directory", - MatchType: MatchLongestPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, + {Path: "path", Resource: "databricks_directory", MatchType: MatchLongestPrefix, + SearchValueTransformFunc: appendEndingSlashToDirName, ExtraLookupKey: ParentDirectoryExtraKey}, {Path: "path", Resource: "databricks_user", Match: "home", MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, {Path: "path", Resource: "databricks_service_principal", Match: "home", @@ -1579,21 +1574,15 @@ var resourcesMap map[string]importable = map[string]importable{ // TODO: it's not completely correct condition - we need to make emit smarter - // emit only if permissions are different from their parent's permission. - if idx := strings.LastIndex(r.ID, "/"); idx != -1 { - directoryPath := r.ID[:idx] - ic.Emit(&resource{ - Resource: "databricks_directory", - ID: directoryPath, - }) - } - log.Printf("Creating %s for %s", fileName, r) + ic.emitWorkspaceObjectParentDirectory(r) + log.Printf("[TRACE] Creating %s for %s", fileName, r) return r.Data.Set("source", fileName) }, ShouldOmitField: shouldOmitMd5Field, Depends: []reference{ {Path: "source", File: true}, - {Path: "path", Resource: "databricks_directory", - MatchType: MatchLongestPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, + {Path: "path", Resource: "databricks_directory", MatchType: MatchLongestPrefix, + SearchValueTransformFunc: appendEndingSlashToDirName, ExtraLookupKey: ParentDirectoryExtraKey}, {Path: "path", Resource: "databricks_user", Match: "home", MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, {Path: "path", Resource: "databricks_service_principal", Match: "home", diff --git a/exporter/model.go b/exporter/model.go index b05f426e46..2a9b2852fa 100644 --- a/exporter/model.go +++ b/exporter/model.go @@ -203,6 +203,8 @@ type reference struct { IsValidApproximation isValidAproximationFunc // if we should skip direct lookups (for example, we need it for UC schemas matching) SkipDirectLookup bool + // Extra Lookup key - if we need to search for the resource in a different way + ExtraLookupKey string } func (r reference) MatchAttribute() string { diff --git a/exporter/util.go b/exporter/util.go index 7c542a5746..992606fe05 100644 --- a/exporter/util.go +++ b/exporter/util.go @@ -1123,6 +1123,8 @@ func emitWorkpaceObject(ic *importContext, object workspace.ObjectStatus) { ic.maybeEmitWorkspaceObject("databricks_notebook", object.Path, &object) case workspace.File: ic.maybeEmitWorkspaceObject("databricks_workspace_file", object.Path, &object) + case workspace.Directory: + ic.maybeEmitWorkspaceObject("databricks_directory", object.Path, &object) default: log.Printf("[WARN] unknown type %s for path %s", object.ObjectType, object.Path) } @@ -1140,8 +1142,6 @@ func listNotebooksAndWorkspaceFiles(ic *importContext) error { for object := range objectsChannel { processedObjects.Add(1) ic.waitGroup.Add(1) - // log.Printf("[DEBUG] channel %d for workspace objects, channel size=%d got %v", - // num, len(objectsChannel), object) emitWorkpaceObject(ic, object) ic.waitGroup.Done() } @@ -1153,15 +1153,19 @@ func listNotebooksAndWorkspaceFiles(ic *importContext) error { updatedSinceMs := ic.getUpdatedSinceMs() allObjects := ic.getAllWorkspaceObjects(func(objects []workspace.ObjectStatus) { for _, object := range objects { - if ic.shouldSkipWorkspaceObject(object, updatedSinceMs) { - continue - } - object := object - switch object.ObjectType { - case workspace.Notebook, workspace.File: + if object.ObjectType == workspace.Directory && object.Path != "/" && !ic.incremental { objectsChannel <- object - default: - log.Printf("[WARN] unknown type %s for path %s", object.ObjectType, object.Path) + } else { + if ic.shouldSkipWorkspaceObject(object, updatedSinceMs) { + continue + } + object := object + switch object.ObjectType { + case workspace.Notebook, workspace.File: + objectsChannel <- object + default: + log.Printf("[WARN] unknown type %s for path %s", object.ObjectType, object.Path) + } } } }) @@ -1459,3 +1463,17 @@ func (ic *importContext) emitPermissionsIfNotIgnored(r *resource, id, name strin } } } + +func (ic *importContext) emitWorkspaceObjectParentDirectory(r *resource) { + if !ic.isServiceEnabled("directories") { + return + } + if idx := strings.LastIndex(r.ID, "/"); idx > 0 { // not found, or directly in the root... + directoryPath := r.ID[:idx] + ic.Emit(&resource{ + Resource: "databricks_directory", + ID: directoryPath, + }) + r.AddExtraData(ParentDirectoryExtraKey, directoryPath) + } +} diff --git a/exporter/util_test.go b/exporter/util_test.go index d658970196..90dee5c96a 100644 --- a/exporter/util_test.go +++ b/exporter/util_test.go @@ -412,3 +412,20 @@ func TestIgnoreObjectWithEmptyName(t *testing.T) { assert.False(t, ignoreFunc(ic, r)) assert.Equal(t, 1, len(ic.ignoredResources)) } + +func TestEmitWorkspaceObjectParentDirectory(t *testing.T) { + ic := importContextForTest() + ic.enableServices("notebooks,directories") + dirPath := "/Shared" + r := &resource{ + ID: "/Shared/abc", + Resource: "databricks_notebook", + } + ic.emitWorkspaceObjectParentDirectory(r) + assert.Equal(t, 1, len(ic.testEmits)) + assert.True(t, ic.testEmits["databricks_directory[] (id: /Shared)"]) + + dir, exists := r.GetExtraData(ParentDirectoryExtraKey) + assert.True(t, exists) + assert.Equal(t, dirPath, dir) +}