From 37f2966b6423b3c2cb98db3fede67adb23293dee Mon Sep 17 00:00:00 2001
From: Mihailo Aleksic <mihailo.aleksic@databricks.com>
Date: Sat, 5 Oct 2024 11:23:22 +0200
Subject: [PATCH] [SPARK-49866][SQL] Improve the error message for describe
 table with partition columns

### What changes were proposed in this pull request?
Provide more user facing error when partition column name can't be found in the table schema.

### Why are the changes needed?
There's an issue where partition column sometimes doesn't match any from the table schema. When that happens we throw an assertion error which is not user friendly. Because of that we introduced new `QueryExecutionError` in order to make it more user facing.

### Does this PR introduce _any_ user-facing change?
Yes, users will get more user friendly error message.

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #48338 from mihailoale-db/mihailoale-db/fixdescribepartitioningmessage.

Authored-by: Mihailo Aleksic <mihailo.aleksic@databricks.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../src/main/resources/error/error-conditions.json   |  6 ++++++
 .../spark/sql/errors/QueryExecutionErrors.scala      | 12 ++++++++++++
 .../execution/datasources/v2/DescribeTableExec.scala | 10 +++++++---
 3 files changed, 25 insertions(+), 3 deletions(-)
diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json
index eca3587c74071..f4cb34495612a 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -3802,6 +3802,12 @@
     ],
     "sqlState" : "428FT"
   },
+  "PARTITION_COLUMN_NOT_FOUND_IN_SCHEMA" : {
+    "message" : [
+      "Partition column <column> not found in schema <schema>. Please provide the existing column for partitioning."
+    ],
+    "sqlState" : "42000"
+  },
   "PATH_ALREADY_EXISTS" : {
     "message" : [
       "Path <outputPath> already exists. Set mode as \"overwrite\" to overwrite the existing path."
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 5e3aa3ef5f6bb..bc6c7681ea1a5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -2856,4 +2856,16 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       )
     )
   }
+
+  def partitionColumnNotFoundInTheTableSchemaError(
+      column: Seq[String],
+      schema: StructType): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "PARTITION_COLUMN_NOT_FOUND_IN_SCHEMA",
+      messageParameters = Map(
+        "column" -> toSQLId(column),
+        "schema" -> toSQLType(schema)
+      )
+    )
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
index 7f7f280d8cdca..7cfd601ef774f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, ResolveDefaultColumns}
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, SupportsRead, Table, TableCatalog}
 import org.apache.spark.sql.connector.expressions.{ClusterByTransform, IdentityTransform}
 import org.apache.spark.sql.connector.read.SupportsReportStatistics
+import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.ArrayImplicits._
 
@@ -156,9 +157,12 @@ case class DescribeTableExec(
           .map(_.asInstanceOf[IdentityTransform].ref.fieldNames())
           .map { fieldNames =>
             val nestedField = table.schema.findNestedField(fieldNames.toImmutableArraySeq)
-            assert(nestedField.isDefined,
-              s"Not found the partition column ${fieldNames.map(quoteIfNeeded).mkString(".")} " +
-              s"in the table schema ${table.schema().catalogString}.")
+            if (nestedField.isEmpty) {
+              throw QueryExecutionErrors.partitionColumnNotFoundInTheTableSchemaError(
+                fieldNames.toSeq,
+                table.schema()
+              )
+            }
             nestedField.get
           }.map { case (path, field) =>
             toCatalystRow(