Skip to content

Commit

Permalink
Exporter: decrease a need for get-status calls for directories when…
Browse files Browse the repository at this point in the history
… listing workspace objects (#3470)

* Exporter: decrease a need for `get-status` calls for directories when listing

Also, rework handling of longest prefix matches a bit

* remove commented out code

---------

Co-authored-by: Tanmay Rustagi <88379306+tanmay-db@users.noreply.github.com>
  • Loading branch information
alexott and tanmay-db authored Apr 16, 2024
1 parent b2286ca commit 160f38e
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 32 deletions.
15 changes: 12 additions & 3 deletions exporter/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -1250,14 +1250,24 @@ func (ic *importContext) Find(value, attr string, ref reference, origResource *r
!ic.isIgnoredResourceApproximation(sr) {
log.Printf("[DEBUG] Finished direct lookup for reference for resource %s, attr='%s', value='%s', ref=%v. Found: type=%s name=%s",
ref.Resource, attr, value, ref, sr.Type, sr.Name)
// TODO: we need to not generate traversals resources for which their Ignore function returns true...
return matchValue, genTraversalTokens(sr, attr), sr.Mode == "data"
}
if ref.MatchType != MatchCaseInsensitive { // for case-insensitive matching we'll try iteration
log.Printf("[DEBUG] Finished direct lookup for reference for resource %s, attr='%s', value='%s', ref=%v. Not found",
ref.Resource, attr, value, ref)
return "", nil, false
}
} else if ref.MatchType == MatchLongestPrefix && ref.ExtraLookupKey != "" {
extraKeyValue, exists := origResource.GetExtraData(ref.ExtraLookupKey)
if exists && extraKeyValue.(string) != "" {
sr := ic.State.Get(ref.Resource, attr, extraKeyValue.(string))
if sr != nil && (ref.IsValidApproximation == nil || ref.IsValidApproximation(ic, origResource, sr, origPath)) &&
!ic.isIgnoredResourceApproximation(sr) {
log.Printf("[DEBUG] Finished direct lookup by key %s for reference for resource %s, attr='%s', value='%s', ref=%v. Found: type=%s name=%s",
ref.ExtraLookupKey, ref.Resource, attr, value, ref, sr.Type, sr.Name)
return extraKeyValue.(string), genTraversalTokens(sr, attr), sr.Mode == "data"
}
}
}

maxPrefixLen := 0
Expand All @@ -1275,7 +1285,7 @@ func (ic *importContext) Find(value, attr string, ref reference, origResource *r
origValue := strValue
if ref.SearchValueTransformFunc != nil {
strValue = ref.SearchValueTransformFunc(strValue)
log.Printf("[DEBUG] Resource %s. Transformed value from '%s' to '%s'", ref.Resource, origValue, strValue)
log.Printf("[TRACE] Resource %s. Transformed value from '%s' to '%s'", ref.Resource, origValue, strValue)
}
matched := false
switch ref.MatchType {
Expand All @@ -1298,7 +1308,6 @@ func (ic *importContext) Find(value, attr string, ref reference, origResource *r
ic.isIgnoredResourceApproximation(sr) {
continue
}
// TODO: we need to not generate traversals resources for which their Ignore function returns true...
log.Printf("[DEBUG] Finished searching for reference for resource %s, attr='%s', value='%s', ref=%v. Found: type=%s name=%s",
ref.Resource, attr, value, ref, sr.Type, sr.Name)
return origValue, genTraversalTokens(sr, attr), sr.Mode == "data"
Expand Down
27 changes: 8 additions & 19 deletions exporter/importables.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ var (
"storage_credential": {`CREATE_EXTERNAL_LOCATION`, `CREATE_EXTERNAL_TABLE`},
"foreign_connection": {`CREATE_FOREIGN_CATALOG`},
}
ParentDirectoryExtraKey = "parent_directory"
)

func generateMountBody(ic *importContext, body *hclwrite.Body, r *resource) error {
Expand Down Expand Up @@ -1527,20 +1528,14 @@ var resourcesMap map[string]importable = map[string]importable{
"notebook_"+ic.Importables["databricks_notebook"].Name(ic, r.Data))
// TODO: it's not completely correct condition - we need to make emit smarter -
// emit only if permissions are different from their parent's permission.
if idx := strings.LastIndex(r.ID, "/"); idx != -1 {
directoryPath := r.ID[:idx]
ic.Emit(&resource{
Resource: "databricks_directory",
ID: directoryPath,
})
}
ic.emitWorkspaceObjectParentDirectory(r)
return r.Data.Set("source", fileName)
},
ShouldOmitField: shouldOmitMd5Field,
Depends: []reference{
{Path: "source", File: true},
{Path: "path", Resource: "databricks_directory",
MatchType: MatchLongestPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "path", Resource: "databricks_directory", MatchType: MatchLongestPrefix,
SearchValueTransformFunc: appendEndingSlashToDirName, ExtraLookupKey: ParentDirectoryExtraKey},
{Path: "path", Resource: "databricks_user", Match: "home",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "path", Resource: "databricks_service_principal", Match: "home",
Expand Down Expand Up @@ -1580,21 +1575,15 @@ var resourcesMap map[string]importable = map[string]importable{

// TODO: it's not completely correct condition - we need to make emit smarter -
// emit only if permissions are different from their parent's permission.
if idx := strings.LastIndex(r.ID, "/"); idx != -1 {
directoryPath := r.ID[:idx]
ic.Emit(&resource{
Resource: "databricks_directory",
ID: directoryPath,
})
}
log.Printf("Creating %s for %s", fileName, r)
ic.emitWorkspaceObjectParentDirectory(r)
log.Printf("[TRACE] Creating %s for %s", fileName, r)
return r.Data.Set("source", fileName)
},
ShouldOmitField: shouldOmitMd5Field,
Depends: []reference{
{Path: "source", File: true},
{Path: "path", Resource: "databricks_directory",
MatchType: MatchLongestPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "path", Resource: "databricks_directory", MatchType: MatchLongestPrefix,
SearchValueTransformFunc: appendEndingSlashToDirName, ExtraLookupKey: ParentDirectoryExtraKey},
{Path: "path", Resource: "databricks_user", Match: "home",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "path", Resource: "databricks_service_principal", Match: "home",
Expand Down
2 changes: 2 additions & 0 deletions exporter/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ type reference struct {
IsValidApproximation isValidAproximationFunc
// if we should skip direct lookups (for example, we need it for UC schemas matching)
SkipDirectLookup bool
// Extra Lookup key - if we need to search for the resource in a different way
ExtraLookupKey string
}

func (r reference) MatchAttribute() string {
Expand Down
38 changes: 28 additions & 10 deletions exporter/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -1123,6 +1123,8 @@ func emitWorkpaceObject(ic *importContext, object workspace.ObjectStatus) {
ic.maybeEmitWorkspaceObject("databricks_notebook", object.Path, &object)
case workspace.File:
ic.maybeEmitWorkspaceObject("databricks_workspace_file", object.Path, &object)
case workspace.Directory:
ic.maybeEmitWorkspaceObject("databricks_directory", object.Path, &object)
default:
log.Printf("[WARN] unknown type %s for path %s", object.ObjectType, object.Path)
}
Expand All @@ -1140,8 +1142,6 @@ func listNotebooksAndWorkspaceFiles(ic *importContext) error {
for object := range objectsChannel {
processedObjects.Add(1)
ic.waitGroup.Add(1)
// log.Printf("[DEBUG] channel %d for workspace objects, channel size=%d got %v",
// num, len(objectsChannel), object)
emitWorkpaceObject(ic, object)
ic.waitGroup.Done()
}
Expand All @@ -1153,15 +1153,19 @@ func listNotebooksAndWorkspaceFiles(ic *importContext) error {
updatedSinceMs := ic.getUpdatedSinceMs()
allObjects := ic.getAllWorkspaceObjects(func(objects []workspace.ObjectStatus) {
for _, object := range objects {
if ic.shouldSkipWorkspaceObject(object, updatedSinceMs) {
continue
}
object := object
switch object.ObjectType {
case workspace.Notebook, workspace.File:
if object.ObjectType == workspace.Directory && object.Path != "/" && !ic.incremental {
objectsChannel <- object
default:
log.Printf("[WARN] unknown type %s for path %s", object.ObjectType, object.Path)
} else {
if ic.shouldSkipWorkspaceObject(object, updatedSinceMs) {
continue
}
object := object
switch object.ObjectType {
case workspace.Notebook, workspace.File:
objectsChannel <- object
default:
log.Printf("[WARN] unknown type %s for path %s", object.ObjectType, object.Path)
}
}
}
})
Expand Down Expand Up @@ -1459,3 +1463,17 @@ func (ic *importContext) emitPermissionsIfNotIgnored(r *resource, id, name strin
}
}
}

func (ic *importContext) emitWorkspaceObjectParentDirectory(r *resource) {
if !ic.isServiceEnabled("directories") {
return
}
if idx := strings.LastIndex(r.ID, "/"); idx > 0 { // not found, or directly in the root...
directoryPath := r.ID[:idx]
ic.Emit(&resource{
Resource: "databricks_directory",
ID: directoryPath,
})
r.AddExtraData(ParentDirectoryExtraKey, directoryPath)
}
}
17 changes: 17 additions & 0 deletions exporter/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -412,3 +412,20 @@ func TestIgnoreObjectWithEmptyName(t *testing.T) {
assert.False(t, ignoreFunc(ic, r))
assert.Equal(t, 1, len(ic.ignoredResources))
}

func TestEmitWorkspaceObjectParentDirectory(t *testing.T) {
ic := importContextForTest()
ic.enableServices("notebooks,directories")
dirPath := "/Shared"
r := &resource{
ID: "/Shared/abc",
Resource: "databricks_notebook",
}
ic.emitWorkspaceObjectParentDirectory(r)
assert.Equal(t, 1, len(ic.testEmits))
assert.True(t, ic.testEmits["databricks_directory[<unknown>] (id: /Shared)"])

dir, exists := r.GetExtraData(ParentDirectoryExtraKey)
assert.True(t, exists)
assert.Equal(t, dirPath, dir)
}

0 comments on commit 160f38e

Please sign in to comment.