From 37f2966b6423b3c2cb98db3fede67adb23293dee Mon Sep 17 00:00:00 2001 From: Mihailo Aleksic Date: Sat, 5 Oct 2024 11:23:22 +0200 Subject: [PATCH] [SPARK-49866][SQL] Improve the error message for describe table with partition columns ### What changes were proposed in this pull request? Provide more user facing error when partition column name can't be found in the table schema. ### Why are the changes needed? There's an issue where partition column sometimes doesn't match any from the table schema. When that happens we throw an assertion error which is not user friendly. Because of that we introduced new `QueryExecutionError` in order to make it more user facing. ### Does this PR introduce _any_ user-facing change? Yes, users will get more user friendly error message. ### Was this patch authored or co-authored using generative AI tooling? No Closes #48338 from mihailoale-db/mihailoale-db/fixdescribepartitioningmessage. Authored-by: Mihailo Aleksic Signed-off-by: Max Gekk --- .../src/main/resources/error/error-conditions.json | 6 ++++++ .../spark/sql/errors/QueryExecutionErrors.scala | 12 ++++++++++++ .../execution/datasources/v2/DescribeTableExec.scala | 10 +++++++--- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index eca3587c74071..f4cb34495612a 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -3802,6 +3802,12 @@ ], "sqlState" : "428FT" }, + "PARTITION_COLUMN_NOT_FOUND_IN_SCHEMA" : { + "message" : [ + "Partition column not found in schema . Please provide the existing column for partitioning." + ], + "sqlState" : "42000" + }, "PATH_ALREADY_EXISTS" : { "message" : [ "Path already exists. Set mode as \"overwrite\" to overwrite the existing path." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 5e3aa3ef5f6bb..bc6c7681ea1a5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -2856,4 +2856,16 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE ) ) } + + def partitionColumnNotFoundInTheTableSchemaError( + column: Seq[String], + schema: StructType): SparkRuntimeException = { + new SparkRuntimeException( + errorClass = "PARTITION_COLUMN_NOT_FOUND_IN_SCHEMA", + messageParameters = Map( + "column" -> toSQLId(column), + "schema" -> toSQLType(schema) + ) + ) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala index 7f7f280d8cdca..7cfd601ef774f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala @@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, ResolveDefaultColumns} import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, SupportsRead, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.{ClusterByTransform, IdentityTransform} import org.apache.spark.sql.connector.read.SupportsReportStatistics +import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.util.ArrayImplicits._ @@ -156,9 +157,12 @@ case class DescribeTableExec( .map(_.asInstanceOf[IdentityTransform].ref.fieldNames()) .map { fieldNames => val nestedField = table.schema.findNestedField(fieldNames.toImmutableArraySeq) - assert(nestedField.isDefined, - s"Not found the partition column ${fieldNames.map(quoteIfNeeded).mkString(".")} " + - s"in the table schema ${table.schema().catalogString}.") + if (nestedField.isEmpty) { + throw QueryExecutionErrors.partitionColumnNotFoundInTheTableSchemaError( + fieldNames.toSeq, + table.schema() + ) + } nestedField.get }.map { case (path, field) => toCatalystRow(