Skip to content

Commit

Permalink
[Fix] Fix databricks_sql_table treatment of properties (#3925)
Browse files Browse the repository at this point in the history
## Changes
`databricks_sql_table` has been difficult to maintain due to the fact
that Databricks adds table properties and options that users haven't
directly configured. Historically, to handle this, we have tried to
construct properties carefully by tracking which properties are added by
Databricks automatically and removing them. This has two challenges:
1. We don't know ahead of time which properties are set by Databricks,
so we don't know which properties we need to remove to compute an
accurate diff.
2. Users are not able to specify different values for those default
properties: they are hidden from the `serializeProperties()` and
`serializeOptions()` functions in the resource implementation.

This PR takes a different approach for handling these fields by storing
the properties/options returned by the API in `effective_properties` and
`effective_options` fields, respectively. These fields are computed and
non-user-settable, so users cannot override them. When creating/updating
the table, we only include the properties specified by the user.
However, when determining if there is a diff, we compare the effective
properties fetched by the `terraform plan` triggering an update of the
table state with the user-specified properties. If the user specified
any properties that are not part of the effective properties/options, or
if the user's value clashes with the value provided, this will trigger a
diff. However, if a non-user-specified property has a difference, then
that does not trigger a diff.

As a side-effect, this PR also fixes the treatment of `TBLPROPERTIES`
and `OPTIONS` as reported in #3894. It also makes the `properties` field
not computed, fixing #3652. It also allows users to override properties
that are otherwise set by default, fixing #2800.

Fixes #3894.
Fixes #3652. 
Fixes #3378. 
Fixes #2800.
## Tests
Unit tests to verify the diff behavior with this change.
Unit test to ensure that TBLPROPERTIES is correctly set.

- [ ] `make test` run locally
- [ ] relevant change in `docs/` folder
- [ ] covered with integration tests in `internal/acceptance`
- [ ] relevant acceptance tests are passing
- [ ] using Go SDK
  • Loading branch information
mgyucht authored Aug 23, 2024
1 parent 3cb6ea0 commit 5603eb6
Show file tree
Hide file tree
Showing 3 changed files with 313 additions and 101 deletions.
162 changes: 61 additions & 101 deletions catalog/resource_sql_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,44 @@ type SqlTableInfo struct {
StorageCredentialName string `json:"storage_credential_name,omitempty" tf:"force_new"`
ViewDefinition string `json:"view_definition,omitempty"`
Comment string `json:"comment,omitempty"`
Properties map[string]string `json:"properties,omitempty" tf:"computed"`
Properties map[string]string `json:"properties,omitempty"`
Options map[string]string `json:"options,omitempty" tf:"force_new"`
ClusterID string `json:"cluster_id,omitempty" tf:"computed"`
WarehouseID string `json:"warehouse_id,omitempty"`
Owner string `json:"owner,omitempty" tf:"computed"`
// EffectiveProperties includes both properties and options. Options are prefixed with `option.`.
EffectiveProperties map[string]string `json:"effective_properties" tf:"computed"`
ClusterID string `json:"cluster_id,omitempty" tf:"computed"`
WarehouseID string `json:"warehouse_id,omitempty"`
Owner string `json:"owner,omitempty" tf:"computed"`

exec common.CommandExecutor
sqlExec sql.StatementExecutionInterface
}

func (ti SqlTableInfo) CustomizeSchema(s *common.CustomizableSchema) *common.CustomizableSchema {

caseInsensitiveFields := []string{"name", "catalog_name", "schema_name"}
for _, field := range caseInsensitiveFields {
s.SchemaPath(field).SetCustomSuppressDiff(common.EqualFoldDiffSuppress)
}
s.SchemaPath("data_source_format").SetCustomSuppressDiff(func(k, old, new string, d *schema.ResourceData) bool {
if new == "" {
return true
}
return strings.EqualFold(strings.ToLower(old), strings.ToLower(new))
})
s.SchemaPath("storage_location").SetCustomSuppressDiff(ucDirectoryPathSlashAndEmptySuppressDiff)
s.SchemaPath("view_definition").SetCustomSuppressDiff(common.SuppressDiffWhitespaceChange)

s.SchemaPath("cluster_id").SetConflictsWith([]string{"warehouse_id"})
s.SchemaPath("warehouse_id").SetConflictsWith([]string{"cluster_id"})

s.SchemaPath("partitions").SetConflictsWith([]string{"cluster_keys"})
s.SchemaPath("cluster_keys").SetConflictsWith([]string{"partitions"})
s.SchemaPath("column", "type").SetCustomSuppressDiff(func(k, old, new string, d *schema.ResourceData) bool {
return getColumnType(old) == getColumnType(new)
})
return s
}

type SqlTablesAPI struct {
client *common.DatabricksClient
context context.Context
Expand All @@ -62,6 +90,9 @@ func NewSqlTablesAPI(ctx context.Context, m any) SqlTablesAPI {

func (a SqlTablesAPI) getTable(name string) (ti SqlTableInfo, err error) {
err = a.client.Get(a.context, "/unity-catalog/tables/"+name, nil, &ti)
// Copy returned properties & options to read-only attributes
ti.EffectiveProperties = ti.Properties
ti.Properties = nil
return
}

Expand All @@ -77,54 +108,6 @@ func parseComment(s string) string {
return strings.ReplaceAll(strings.ReplaceAll(s, `\'`, `'`), `'`, `\'`)
}

// These properties are added automatically
// If we do not customize the diff using these then terraform will constantly try to remove them
// `properties` is essentially a "partially" computed field
// This needs to be replaced with something a bit more robust in the future
func sqlTableIsManagedProperty(key string) bool {
managedProps := map[string]bool{
// Property set if the table uses `cluster_keys`.
"clusteringColumns": true,

"delta.lastCommitTimestamp": true,
"delta.lastUpdateVersion": true,
"delta.minReaderVersion": true,
"delta.minWriterVersion": true,
"delta.columnMapping.maxColumnId": true,
"delta.enableDeletionVectors": true,
"delta.enableRowTracking": true,
"delta.feature.clustering": true,
"delta.feature.changeDataFeed": true,
"delta.feature.deletionVectors": true,
"delta.feature.domainMetadata": true,
"delta.feature.liquid": true,
"delta.feature.rowTracking": true,
"delta.feature.v2Checkpoint": true,
"delta.feature.timestampNtz": true,
"delta.liquid.clusteringColumns": true,
"delta.rowTracking.materializedRowCommitVersionColumnName": true,
"delta.rowTracking.materializedRowIdColumnName": true,
"delta.checkpoint.writeStatsAsJson": true,
"delta.checkpoint.writeStatsAsStruct": true,
"delta.checkpointPolicy": true,
"view.catalogAndNamespace.numParts": true,
"view.catalogAndNamespace.part.0": true,
"view.catalogAndNamespace.part.1": true,
"view.query.out.col.0": true,
"view.query.out.numCols": true,
"view.referredTempFunctionsNames": true,
"view.referredTempViewNames": true,
"view.sqlConfig.spark.sql.hive.convertCTAS": true,
"view.sqlConfig.spark.sql.legacy.createHiveTableByDefault": true,
"view.sqlConfig.spark.sql.parquet.compression.codec": true,
"view.sqlConfig.spark.sql.session.timeZone": true,
"view.sqlConfig.spark.sql.sources.commitProtocolClass": true,
"view.sqlConfig.spark.sql.sources.default": true,
"view.sqlConfig.spark.sql.streaming.stopTimeout": true,
}
return managedProps[key]
}

func (ti *SqlTableInfo) initCluster(ctx context.Context, d *schema.ResourceData, c *common.DatabricksClient) (err error) {
defaultClusterName := "terraform-sql-table"
clustersAPI := clusters.NewClustersAPI(ctx, c)
Expand Down Expand Up @@ -212,19 +195,15 @@ func (ti *SqlTableInfo) serializeColumnInfos() string {
func (ti *SqlTableInfo) serializeProperties() string {
propsMap := make([]string, 0, len(ti.Properties))
for key, value := range ti.Properties {
if !sqlTableIsManagedProperty(key) {
propsMap = append(propsMap, fmt.Sprintf("'%s'='%s'", key, value))
}
propsMap = append(propsMap, fmt.Sprintf("'%s'='%s'", key, value))
}
return strings.Join(propsMap[:], ", ") // 'foo'='bar', 'this'='that'
}

func (ti *SqlTableInfo) serializeOptions() string {
optionsMap := make([]string, 0, len(ti.Options))
for key, value := range ti.Options {
if !sqlTableIsManagedProperty(key) {
optionsMap = append(optionsMap, fmt.Sprintf("'%s'='%s'", key, value))
}
optionsMap = append(optionsMap, fmt.Sprintf("'%s'='%s'", key, value))
}
return strings.Join(optionsMap[:], ", ") // 'foo'='bar', 'this'='that'
}
Expand Down Expand Up @@ -549,31 +528,7 @@ func assertNoColumnMembershipAndFieldValueUpdate(oldCols []interface{}, newColum
}

func ResourceSqlTable() common.Resource {
tableSchema := common.StructToSchema(SqlTableInfo{},
func(s map[string]*schema.Schema) map[string]*schema.Schema {
caseInsensitiveFields := []string{"name", "catalog_name", "schema_name"}
for _, field := range caseInsensitiveFields {
s[field].DiffSuppressFunc = common.EqualFoldDiffSuppress
}
s["data_source_format"].DiffSuppressFunc = func(k, old, new string, d *schema.ResourceData) bool {
if new == "" {
return true
}
return strings.EqualFold(strings.ToLower(old), strings.ToLower(new))
}
s["storage_location"].DiffSuppressFunc = ucDirectoryPathSlashAndEmptySuppressDiff
s["view_definition"].DiffSuppressFunc = common.SuppressDiffWhitespaceChange

s["cluster_id"].ConflictsWith = []string{"warehouse_id"}
s["warehouse_id"].ConflictsWith = []string{"cluster_id"}

s["partitions"].ConflictsWith = []string{"cluster_keys"}
s["cluster_keys"].ConflictsWith = []string{"partitions"}
common.MustSchemaPath(s, "column", "type").DiffSuppressFunc = func(k, old, new string, d *schema.ResourceData) bool {
return getColumnType(old) == getColumnType(new)
}
return s
})
tableSchema := common.StructToSchema(SqlTableInfo{}, nil)
return common.Resource{
Schema: tableSchema,
CustomizeDiff: func(ctx context.Context, d *schema.ResourceDiff) error {
Expand All @@ -585,25 +540,30 @@ func ResourceSqlTable() common.Resource {
return err
}
}
if d.HasChange("properties") {
old, new := d.GetChange("properties")
oldProps := old.(map[string]any)
newProps := new.(map[string]any)
old, _ = d.GetChange("options")
options := old.(map[string]any)
for key := range oldProps {
if _, ok := newProps[key]; !ok {
//options also gets exposed as properties
if _, ok := options[key]; ok {
newProps[key] = oldProps[key]
}
//some options are exposed as option.[...] properties
if sqlTableIsManagedProperty(key) || strings.HasPrefix(key, "option.") {
newProps[key] = oldProps[key]
}
}
// Compute the new effective property/options.
// If the user changed a property or option, the resource will already be considered changed.
// If the user specified a property but the value of that property has changed, that will appear
// as a change in the effective property/option. To cause a diff to be detected, we need to
// reset the effective property/option to the requested value.
userSpecifiedProperties := d.Get("properties").(map[string]interface{})
userSpecifiedOptions := d.Get("options").(map[string]interface{})
effectiveProperties := d.Get("effective_properties").(map[string]interface{})
diff := make(map[string]interface{})
for k, userSpecifiedValue := range userSpecifiedProperties {
if effectiveValue, ok := effectiveProperties[k]; !ok || effectiveValue != userSpecifiedValue {
diff[k] = userSpecifiedValue
}
}
for k, userSpecifiedValue := range userSpecifiedOptions {
if effectiveValue, ok := effectiveProperties["option."+k]; !ok || effectiveValue != userSpecifiedValue {
diff["option."+k] = userSpecifiedValue
}
}
if len(diff) > 0 {
for k, v := range diff {
effectiveProperties[k] = v
}
d.SetNew("properties", newProps)
d.SetNew("effective_properties", effectiveProperties)
}
// No support yet for changing the COMMENT on a VIEW
// Once added this can be removed
Expand Down
Loading

0 comments on commit 5603eb6

Please sign in to comment.