snowflakedb · acristu · Feb 6, 2023 · Feb 10, 2023 · Feb 10, 2023 · Feb 13, 2023
@@ -74,6 +74,13 @@ public class SnowflakeSinkConnectorConfig {
   public static final String ENABLE_SCHEMATIZATION_CONFIG = "snowflake.enable.schematization";
   public static final String ENABLE_SCHEMATIZATION_DEFAULT = "false";
 
+  public static final String SCHEMATIZATION_AUTO_CONFIG = "snowflake.schematization.auto";
+  public static final String SCHEMATIZATION_AUTO_DEFAULT = "true";
+  public static final String SCHEMATIZATION_AUTO_DISPLAY = "Use automatic schema evolution";
+  public static final String SCHEMATIZATION_AUTO_DOC =
+      "If true, use snowflake automatic schema evolution feature."
+          + "NOTE: you need to grant evolve schema to " + SNOWFLAKE_USER;
+
   // Proxy Info
   private static final String PROXY_INFO = "Proxy Info";
   public static final String JVM_PROXY_HOST = "jvm.proxy.host";

@@ -281,5 +281,5 @@ public interface SnowflakeConnectionService {
    *
    * @param tableName table name
    */
-  void createTableWithOnlyMetadataColumn(String tableName);
+  void createTableWithOnlyMetadataColumn(String tableName, boolean autoSchematization);
 }
@@ -135,7 +135,7 @@ public void createTable(final String tableName) {
   }
 
   @Override
-  public void createTableWithOnlyMetadataColumn(final String tableName) {
+  public void createTableWithOnlyMetadataColumn(final String tableName, final boolean autoSchematization) {
     checkConnection();
     InternalUtils.assertNotEmpty("tableName", tableName);
     String createTableQuery =
@@ -151,17 +151,19 @@ public void createTableWithOnlyMetadataColumn(final String tableName) {
       throw SnowflakeErrors.ERROR_2007.getException(e);
     }
 
-    // Enable schema evolution by default if the table is created by the connector
-    String enableSchemaEvolutionQuery =
-        "alter table identifier(?) set ENABLE_SCHEMA_EVOLUTION = true";
-    try {
-      PreparedStatement stmt = conn.prepareStatement(enableSchemaEvolutionQuery);
-      stmt.setString(1, tableName);
-      stmt.executeQuery();
-    } catch (SQLException e) {
-      // Skip the error given that schema evolution is still under PrPr
-      LOG_WARN_MSG(
-          "Enable schema evolution failed on table: {}, message: {}", tableName, e.getMessage());
+    if (autoSchematization) {
+      // Enable schema evolution by default if the table is created by the connector
+      String enableSchemaEvolutionQuery =
+          "alter table identifier(?) set ENABLE_SCHEMA_EVOLUTION = true";
+      try {
+        PreparedStatement stmt = conn.prepareStatement(enableSchemaEvolutionQuery);
+        stmt.setString(1, tableName);
+        stmt.executeQuery();
+      } catch (SQLException e) {
+        // Skip the error given that schema evolution is still under PrPr
+        LOG_WARN_MSG(
+            "Enable schema evolution failed on table: {}, message: {}", tableName, e.getMessage());
+      }
     }
 
     LOG_INFO_MSG("Created table {} with only RECORD_METADATA column", tableName);

@@ -145,7 +145,7 @@ private static Map<String, String> getSchemaMapFromRecord(SinkRecord record) {
     Schema schema = record.valueSchema();
     if (schema != null) {
       for (Field field : schema.fields()) {
-        schemaMap.put(field.name(), convertToSnowflakeType(field.schema().type()));
+        schemaMap.put(field.name(), convertToSnowflakeType(field.schema().type(), field.schema().name()));
       }
     }
     return schemaMap;
@@ -158,7 +158,7 @@ private static String inferDataTypeFromJsonObject(JsonNode value) {
       // only when the type of the value is unrecognizable for JAVA
       throw SnowflakeErrors.ERROR_5021.getException("class: " + value.getClass());
     }
-    return convertToSnowflakeType(schemaType);
+    return convertToSnowflakeType(schemaType, null);
   }
 
   /** Convert a json node type to kafka data type */
@@ -192,7 +192,7 @@ private static Type convertJsonNodeTypeToKafkaType(JsonNode value) {
   }
 
   /** Convert the kafka data type to Snowflake data type */
-  private static String convertToSnowflakeType(Type kafkaType) {
+  private static String convertToSnowflakeType(Type kafkaType, String semanticType) {
     switch (kafkaType) {
       case INT8:
         return "BYTEINT";
@@ -209,6 +209,9 @@ private static String convertToSnowflakeType(Type kafkaType) {
       case BOOLEAN:
         return "BOOLEAN";
       case STRING:
+        if (semanticType != null && semanticType.equals("io.debezium.data.Json")) {
+          return "VARIANT";
+        }
         return "VARCHAR";
       case BYTES:
         return "BINARY";

@@ -101,6 +101,7 @@ public class SnowflakeSinkServiceV2 implements SnowflakeSinkService {
   private final String streamingIngestClientName;
 
   private boolean enableSchematization;
+  private boolean autoSchematization;
 
   /**
    * Key is formulated in {@link #partitionChannelKey(String, int)} }
@@ -131,6 +132,8 @@ public SnowflakeSinkServiceV2(
 
     this.enableSchematization =
         this.recordService.setAndGetEnableSchematizationFromConfig(this.connectorConfig);
+    this.autoSchematization =
+        this.recordService.setAndGetAutoSchematizationFromConfig(this.connectorConfig);
 
     this.taskId = connectorConfig.getOrDefault(Utils.TASK_ID, "-1");
     this.streamingIngestClientName =
@@ -516,7 +519,7 @@ private void createTableIfNotExists(final String tableName) {
       if (this.enableSchematization) {
         // Always create the table with RECORD_METADATA only and rely on schema evolution to update
         // the schema
-        this.conn.createTableWithOnlyMetadataColumn(tableName);
+        this.conn.createTableWithOnlyMetadataColumn(tableName, this.autoSchematization);
       } else {
         this.conn.createTable(tableName);
       }

@@ -175,6 +175,7 @@ public class TopicPartitionChannel {
 
   // Whether schematization has been enabled.
   private final boolean enableSchematization;
+  private final boolean autoSchematization;
 
   // Whether schema evolution could be done on this channel
   private final boolean enableSchemaEvolution;
@@ -255,11 +256,14 @@ public TopicPartitionChannel(
     /* Schematization related properties */
     this.enableSchematization =
         this.recordService.setAndGetEnableSchematizationFromConfig(sfConnectorConfig);
+    this.autoSchematization =
+        this.recordService.setAndGetAutoSchematizationFromConfig(sfConnectorConfig);
     this.enableSchemaEvolution =
         this.enableSchematization
             && this.conn != null
-            && this.conn.hasSchemaEvolutionPermission(
-                tableName, sfConnectorConfig.get(SNOWFLAKE_ROLE));
+            && (!autoSchematization || 
+              this.conn.hasSchemaEvolutionPermission(
+                tableName, sfConnectorConfig.get(SNOWFLAKE_ROLE)));
   }
 
   /**

@@ -71,6 +71,7 @@ public class RecordService extends EnableLogging {
   static final String HEADERS = "headers";
 
   private boolean enableSchematization = false;
+  private boolean autoSchematization = true;
 
   // For each task, we require a separate instance of SimpleDataFormat, since they are not
   // inherently thread safe
@@ -120,6 +121,24 @@ public boolean setAndGetEnableSchematizationFromConfig(
     return this.enableSchematization;
   }
 
+/**
+ * extract autoSchematization from the connector config and set the value for the recordService
+ *
+ * <p>The extracted boolean is returned for external usage.
+ *
+ * @param connectorConfig the connector config map
+ * @return a boolean indicating whether schematization is enabled
+ */
+public boolean setAndGetAutoSchematizationFromConfig(
+    final Map<String, String> connectorConfig) {
+  if (connectorConfig.containsKey(SnowflakeSinkConnectorConfig.SCHEMATIZATION_AUTO_CONFIG)) {
+    this.autoSchematization =
+        Boolean.parseBoolean(
+            connectorConfig.get(SnowflakeSinkConnectorConfig.SCHEMATIZATION_AUTO_CONFIG));
+  }
+  return this.autoSchematization;
+}  
+
   /**
    * Directly set the enableSchematization through param
    *